]> git.proxmox.com Git - ceph.git/blame - ceph/src/erasure-code/isa/ErasureCodeIsa.cc
bump version to 18.2.2-pve1
[ceph.git] / ceph / src / erasure-code / isa / ErasureCodeIsa.cc
CommitLineData
7c673cae
FG
1/*
2 * Ceph - scalable distributed file system
3 *
4 * Copyright (C) 2014 CERN (Switzerland)
5 *
6 * Author: Andreas-Joachim Peters <Andreas.Joachim.Peters@cern.ch>
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 */
14
15// -----------------------------------------------------------------------------
16#include <algorithm>
9f95a23c 17#include <cerrno>
7c673cae
FG
18// -----------------------------------------------------------------------------
19#include "common/debug.h"
20#include "ErasureCodeIsa.h"
21#include "xor_op.h"
11fdf7f2 22#include "include/ceph_assert.h"
31f18b77 23using namespace std;
9f95a23c 24using namespace ceph;
31f18b77 25
7c673cae
FG
26// -----------------------------------------------------------------------------
27extern "C" {
28#include "isa-l/include/erasure_code.h"
29}
30// -----------------------------------------------------------------------------
31#define dout_context g_ceph_context
32#define dout_subsys ceph_subsys_osd
33#undef dout_prefix
34#define dout_prefix _prefix(_dout)
35// -----------------------------------------------------------------------------
36
37// -----------------------------------------------------------------------------
38
39static ostream&
40_prefix(std::ostream* _dout)
41{
42 return *_dout << "ErasureCodeIsa: ";
43}
44// -----------------------------------------------------------------------------
45
46const std::string ErasureCodeIsaDefault::DEFAULT_K("7");
47const std::string ErasureCodeIsaDefault::DEFAULT_M("3");
48
7c673cae
FG
49
50// -----------------------------------------------------------------------------
51
52int
53ErasureCodeIsa::init(ErasureCodeProfile &profile, ostream *ss)
54{
55 int err = 0;
7c673cae
FG
56 err |= parse(profile, ss);
57 if (err)
58 return err;
59 prepare();
224ce89b 60 return ErasureCode::init(profile, ss);
7c673cae
FG
61}
62
63// -----------------------------------------------------------------------------
64
65unsigned int
66ErasureCodeIsa::get_chunk_size(unsigned int object_size) const
67{
68 unsigned alignment = get_alignment();
69 unsigned chunk_size = ( object_size + k - 1 ) / k;
70 dout(20) << "get_chunk_size: chunk_size " << chunk_size
71 << " must be modulo " << alignment << dendl;
72 unsigned modulo = chunk_size % alignment;
73 if (modulo) {
74 dout(10) << "get_chunk_size: " << chunk_size
75 << " padded to " << chunk_size + alignment - modulo << dendl;
76 chunk_size += alignment - modulo;
77 }
78 return chunk_size;
79}
80
81// -----------------------------------------------------------------------------
82
83int ErasureCodeIsa::encode_chunks(const set<int> &want_to_encode,
84 map<int, bufferlist> *encoded)
85{
86 char *chunks[k + m];
87 for (int i = 0; i < k + m; i++)
88 chunks[i] = (*encoded)[i].c_str();
89 isa_encode(&chunks[0], &chunks[k], (*encoded)[0].length());
90 return 0;
91}
92
93int ErasureCodeIsa::decode_chunks(const set<int> &want_to_read,
94 const map<int, bufferlist> &chunks,
95 map<int, bufferlist> *decoded)
96{
97 unsigned blocksize = (*chunks.begin()).second.length();
98 int erasures[k + m + 1];
99 int erasures_count = 0;
100 char *data[k];
101 char *coding[m];
102 for (int i = 0; i < k + m; i++) {
103 if (chunks.find(i) == chunks.end()) {
104 erasures[erasures_count] = i;
105 erasures_count++;
106 }
107 if (i < k)
108 data[i] = (*decoded)[i].c_str();
109 else
110 coding[i - k] = (*decoded)[i].c_str();
111 }
112 erasures[erasures_count] = -1;
11fdf7f2 113 ceph_assert(erasures_count > 0);
7c673cae
FG
114 return isa_decode(erasures, data, coding, blocksize);
115}
116
117// -----------------------------------------------------------------------------
118
119void
120ErasureCodeIsaDefault::isa_encode(char **data,
121 char **coding,
122 int blocksize)
123{
124
125 if (m == 1)
126 // single parity stripe
127 region_xor((unsigned char**) data, (unsigned char*) coding[0], k, blocksize);
128 else
129 ec_encode_data(blocksize, k, m, encode_tbls,
130 (unsigned char**) data, (unsigned char**) coding);
131}
132
133// -----------------------------------------------------------------------------
134
135bool
136ErasureCodeIsaDefault::erasure_contains(int *erasures, int i)
137{
138 for (int l = 0; erasures[l] != -1; l++) {
139 if (erasures[l] == i)
140 return true;
141 }
142 return false;
143}
144
145// -----------------------------------------------------------------------------
146
147
148
149// -----------------------------------------------------------------------------
150
151int
152ErasureCodeIsaDefault::isa_decode(int *erasures,
153 char **data,
154 char **coding,
155 int blocksize)
156{
157 int nerrs = 0;
158 int i, r, s;
159
160 // count the errors
161 for (int l = 0; erasures[l] != -1; l++) {
162 nerrs++;
163 }
164
165 unsigned char *recover_source[k];
166 unsigned char *recover_target[m];
167
168 memset(recover_source, 0, sizeof (recover_source));
169 memset(recover_target, 0, sizeof (recover_target));
170
171 // ---------------------------------------------
172 // Assign source and target buffers
173 // ---------------------------------------------
174 for (i = 0, s = 0, r = 0; ((r < k) || (s < nerrs)) && (i < (k + m)); i++) {
175 if (!erasure_contains(erasures, i)) {
176 if (r < k) {
177 if (i < k) {
178 recover_source[r] = (unsigned char*) data[i];
179 } else {
180 recover_source[r] = (unsigned char*) coding[i - k];
181 }
182 r++;
183 }
184 } else {
185 if (s < m) {
186 if (i < k) {
187 recover_target[s] = (unsigned char*) data[i];
188 } else {
189 recover_target[s] = (unsigned char*) coding[i - k];
190 }
191 s++;
192 }
193 }
194 }
195
196 if (m == 1) {
197 // single parity decoding
11fdf7f2 198 ceph_assert(1 == nerrs);
7c673cae
FG
199 dout(20) << "isa_decode: reconstruct using region xor [" <<
200 erasures[0] << "]" << dendl;
201 region_xor(recover_source, recover_target[0], k, blocksize);
202 return 0;
203 }
204
205
206 if ((matrixtype == kVandermonde) &&
207 (nerrs == 1) &&
208 (erasures[0] < (k + 1))) {
209 // use xor decoding if a data chunk is missing or the first coding chunk
210 dout(20) << "isa_decode: reconstruct using region xor [" <<
211 erasures[0] << "]" << dendl;
11fdf7f2
TL
212 ceph_assert(1 == s);
213 ceph_assert(k == r);
7c673cae
FG
214 region_xor(recover_source, recover_target[0], k, blocksize);
215 return 0;
216 }
217
218 unsigned char d[k * (m + k)];
219 unsigned char decode_tbls[k * (m + k)*32];
220 unsigned char *p_tbls = decode_tbls;
221
222 int decode_index[k];
223
224 if (nerrs > m)
225 return -1;
226
227 std::string erasure_signature; // describes a matrix configuration for caching
228
229 // ---------------------------------------------
230 // Construct b by removing error rows
231 // ---------------------------------------------
232
233 for (i = 0, r = 0; i < k; i++, r++) {
234 char id[128];
235 while (erasure_contains(erasures, r))
236 r++;
237
238 decode_index[i] = r;
239
240 snprintf(id, sizeof (id), "+%d", r);
241 erasure_signature += id;
242 }
243
244 for (int p = 0; p < nerrs; p++) {
245 char id[128];
246 snprintf(id, sizeof (id), "-%d", erasures[p]);
247 erasure_signature += id;
248 }
249
250 // ---------------------------------------------
251 // Try to get an already computed matrix
252 // ---------------------------------------------
253 if (!tcache.getDecodingTableFromCache(erasure_signature, p_tbls, matrixtype, k, m)) {
254 int j;
255 unsigned char b[k * (m + k)];
256 unsigned char c[k * (m + k)];
257
258 for (i = 0; i < k; i++) {
259 r = decode_index[i];
260 for (j = 0; j < k; j++)
261 b[k * i + j] = encode_coeff[k * r + j];
262 }
263 // ---------------------------------------------
264 // Compute inverted matrix
265 // ---------------------------------------------
266
267 // --------------------------------------------------------
268 // Remark: this may fail for certain Vandermonde matrices !
269 // There is an advanced way trying to use different
270 // source chunks to get an invertible matrix, however
271 // there are also (k,m) combinations which cannot be
272 // inverted when m chunks are lost and this optimizations
273 // does not help. Therefor we keep the code simpler.
274 // --------------------------------------------------------
275 if (gf_invert_matrix(b, d, k) < 0) {
276 dout(0) << "isa_decode: bad matrix" << dendl;
277 return -1;
278 }
279
280 for (int p = 0; p < nerrs; p++) {
281 if (erasures[p] < k) {
282 // decoding matrix elements for data chunks
283 for (j = 0; j < k; j++) {
284 c[k * p + j] = d[k * erasures[p] + j];
285 }
286 } else {
287 // decoding matrix element for coding chunks
288 for (i = 0; i < k; i++) {
289 int s = 0;
290 for (j = 0; j < k; j++)
291 s ^= gf_mul(d[j * k + i],
292 encode_coeff[k * erasures[p] + j]);
293
294 c[k * p + i] = s;
295 }
296 }
297 }
298
299 // ---------------------------------------------
300 // Initialize Decoding Table
301 // ---------------------------------------------
302 ec_init_tables(k, nerrs, c, decode_tbls);
303 tcache.putDecodingTableToCache(erasure_signature, p_tbls, matrixtype, k, m);
304 }
305 // Recover data sources
306 ec_encode_data(blocksize,
307 k, nerrs, decode_tbls, recover_source, recover_target);
308
309
310 return 0;
311}
312
313// -----------------------------------------------------------------------------
314
315unsigned
316ErasureCodeIsaDefault::get_alignment() const
317{
318 return EC_ISA_ADDRESS_ALIGNMENT;
319}
320
321// -----------------------------------------------------------------------------
322
323int ErasureCodeIsaDefault::parse(ErasureCodeProfile &profile,
324 ostream *ss)
325{
326 int err = ErasureCode::parse(profile, ss);
327 err |= to_int("k", profile, &k, DEFAULT_K, ss);
328 err |= to_int("m", profile, &m, DEFAULT_M, ss);
11fdf7f2 329 err |= sanity_check_k_m(k, m, ss);
7c673cae
FG
330
331 if (matrixtype == kVandermonde) {
332 // these are verified safe values evaluated using the
333 // benchmarktool and 10*(combinatoric for maximum loss) random
334 // full erasures
335 if (k > 32) {
336 *ss << "Vandermonde: m=" << m
337 << " should be less/equal than 32 : revert to k=32" << std::endl;
338 k = 32;
339 err = -EINVAL;
340 }
341
342 if (m > 4) {
343 *ss << "Vandermonde: m=" << m
344 << " should be less than 5 to guarantee an MDS codec:"
345 << " revert to m=4" << std::endl;
346 m = 4;
347 err = -EINVAL;
348 }
349 switch (m) {
350 case 4:
351 if (k > 21) {
352 *ss << "Vandermonde: k=" << k
353 << " should be less than 22 to guarantee an MDS"
354 << " codec with m=4: revert to k=21" << std::endl;
355 k = 21;
356 err = -EINVAL;
357 }
358 break;
359 default:
360 ;
361 }
362 }
363 return err;
364}
365
366// -----------------------------------------------------------------------------
367
368void
369ErasureCodeIsaDefault::prepare()
370{
371 // setup shared encoding table and coefficients
372 unsigned char** p_enc_table =
373 tcache.getEncodingTable(matrixtype, k, m);
374
375 unsigned char** p_enc_coeff =
376 tcache.getEncodingCoefficient(matrixtype, k, m);
377
378 if (!*p_enc_coeff) {
379 dout(10) << "[ cache tables ] creating coeff for k=" <<
380 k << " m=" << m << dendl;
381 // build encoding coefficients which need to be computed once for each (k,m)
382 encode_coeff = (unsigned char*) malloc(k * (m + k));
383
384 if (matrixtype == kVandermonde)
385 gf_gen_rs_matrix(encode_coeff, k + m, k);
386 if (matrixtype == kCauchy)
387 gf_gen_cauchy1_matrix(encode_coeff, k + m, k);
388
389 // either our new created coefficients are stored or if they have been
390 // created in the meanwhile the locally allocated coefficients will be
391 // freed by setEncodingCoefficient
392 encode_coeff = tcache.setEncodingCoefficient(matrixtype, k, m, encode_coeff);
393 } else {
394 encode_coeff = *p_enc_coeff;
395 }
396
397 if (!*p_enc_table) {
398 dout(10) << "[ cache tables ] creating tables for k=" <<
399 k << " m=" << m << dendl;
400 // build encoding table which needs to be computed once for each (k,m)
401 encode_tbls = (unsigned char*) malloc(k * (m + k)*32);
402 ec_init_tables(k, m, &encode_coeff[k * k], encode_tbls);
403
404 // either our new created table is stored or if it has been
405 // created in the meanwhile the locally allocated table will be
406 // freed by setEncodingTable
407 encode_tbls = tcache.setEncodingTable(matrixtype, k, m, encode_tbls);
408 } else {
409 encode_tbls = *p_enc_table;
410 }
411
412 unsigned memory_lru_cache =
413 k * (m + k) * 32 * tcache.decoding_tables_lru_length;
414
415 dout(10) << "[ cache memory ] = " << memory_lru_cache << " bytes" <<
416 " [ matrix ] = " <<
417 ((matrixtype == kVandermonde) ? "Vandermonde" : "Cauchy") << dendl;
418
11fdf7f2 419 ceph_assert((matrixtype == kVandermonde) || (matrixtype == kCauchy));
7c673cae
FG
420
421}
422// -----------------------------------------------------------------------------