]> git.proxmox.com Git - ceph.git/blob - ceph/src/erasure-code/lrc/ErasureCodeLrc.cc
bea861f1adee50b18d744ac0c8dfcb2f0b7ba89f
[ceph.git] / ceph / src / erasure-code / lrc / ErasureCodeLrc.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
7 * Copyright (C) 2014 Red Hat <contact@redhat.com>
8 *
9 * Author: Loic Dachary <loic@dachary.org>
10 *
11 * This library is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
15 *
16 */
17
18 #include <cerrno>
19 #include <algorithm>
20
21 #include "include/str_map.h"
22 #include "common/debug.h"
23 #include "crush/CrushWrapper.h"
24 #include "osd/osd_types.h"
25 #include "include/stringify.h"
26 #include "erasure-code/ErasureCodePlugin.h"
27 #include "json_spirit/json_spirit_writer.h"
28
29 #include "ErasureCodeLrc.h"
30
31 #define dout_context g_ceph_context
32 #define dout_subsys ceph_subsys_osd
33 #undef dout_prefix
34 #define dout_prefix _prefix(_dout)
35
36 using namespace std;
37 using namespace ceph;
38
39 static ostream& _prefix(std::ostream* _dout)
40 {
41 return *_dout << "ErasureCodeLrc: ";
42 }
43
44 int ErasureCodeLrc::create_rule(const string &name,
45 CrushWrapper &crush,
46 ostream *ss) const
47 {
48 if (crush.rule_exists(name)) {
49 *ss << "rule " << name << " exists";
50 return -EEXIST;
51 }
52 if (!crush.name_exists(rule_root)) {
53 *ss << "root item " << rule_root << " does not exist";
54 return -ENOENT;
55 }
56 int root = crush.get_item_id(rule_root);
57 if (rule_device_class.size()) {
58 if (!crush.class_exists(rule_device_class)) {
59 *ss << "device class " << rule_device_class << " does not exist";
60 return -ENOENT;
61 }
62 int c = crush.get_class_id(rule_device_class);
63 if (crush.class_bucket.count(root) == 0 ||
64 crush.class_bucket[root].count(c) == 0) {
65 *ss << "root item " << rule_root << " has no devices with class "
66 << rule_device_class;
67 return -EINVAL;
68 }
69 root = crush.class_bucket[root][c];
70 }
71
72 int rno = 0;
73 for (rno = 0; rno < crush.get_max_rules(); rno++) {
74 if (!crush.rule_exists(rno))
75 break;
76 }
77
78 int steps = 4 + rule_steps.size();
79 int ret;
80 ret = crush.add_rule(rno, steps, pg_pool_t::TYPE_ERASURE);
81 ceph_assert(ret == rno);
82 int step = 0;
83
84 ret = crush.set_rule_step(rno, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0);
85 ceph_assert(ret == 0);
86 ret = crush.set_rule_step(rno, step++, CRUSH_RULE_SET_CHOOSE_TRIES, 100, 0);
87 ceph_assert(ret == 0);
88 ret = crush.set_rule_step(rno, step++, CRUSH_RULE_TAKE, root, 0);
89 ceph_assert(ret == 0);
90 // [ [ "choose", "rack", 2 ],
91 // [ "chooseleaf", "host", 5 ] ]
92 for (vector<Step>::const_iterator i = rule_steps.begin();
93 i != rule_steps.end();
94 ++i) {
95 int op = i->op == "chooseleaf" ?
96 CRUSH_RULE_CHOOSELEAF_INDEP : CRUSH_RULE_CHOOSE_INDEP;
97 int type = crush.get_type_id(i->type);
98 if (type < 0) {
99 *ss << "unknown crush type " << i->type;
100 return -EINVAL;
101 }
102 ret = crush.set_rule_step(rno, step++, op, i->n, type);
103 ceph_assert(ret == 0);
104 }
105 ret = crush.set_rule_step(rno, step++, CRUSH_RULE_EMIT, 0, 0);
106 ceph_assert(ret == 0);
107 crush.set_rule_name(rno, name);
108 return rno;
109 }
110
111 int ErasureCodeLrc::layers_description(const ErasureCodeProfile &profile,
112 json_spirit::mArray *description,
113 ostream *ss) const
114 {
115 if (profile.count("layers") == 0) {
116 *ss << "could not find 'layers' in " << profile << std::endl;
117 return ERROR_LRC_DESCRIPTION;
118 }
119 string str = profile.find("layers")->second;
120 try {
121 json_spirit::mValue json;
122 json_spirit::read_or_throw(str, json);
123
124 if (json.type() != json_spirit::array_type) {
125 *ss << "layers='" << str
126 << "' must be a JSON array but is of type "
127 << json.type() << " instead" << std::endl;
128 return ERROR_LRC_ARRAY;
129 }
130 *description = json.get_array();
131 } catch (json_spirit::Error_position &e) {
132 *ss << "failed to parse layers='" << str << "'"
133 << " at line " << e.line_ << ", column " << e.column_
134 << " : " << e.reason_ << std::endl;
135 return ERROR_LRC_PARSE_JSON;
136 }
137 return 0;
138 }
139
140 int ErasureCodeLrc::layers_parse(const string &description_string,
141 json_spirit::mArray description,
142 ostream *ss)
143 {
144 int position = 0;
145 for (vector<json_spirit::mValue>::iterator i = description.begin();
146 i != description.end();
147 ++i, position++) {
148 if (i->type() != json_spirit::array_type) {
149 stringstream json_string;
150 json_spirit::write(*i, json_string);
151 *ss << "each element of the array "
152 << description_string << " must be a JSON array but "
153 << json_string.str() << " at position " << position
154 << " is of type " << i->type() << " instead" << std::endl;
155 return ERROR_LRC_ARRAY;
156 }
157 json_spirit::mArray layer_json = i->get_array();
158 ErasureCodeProfile profile;
159 int index = 0;
160 for (vector<json_spirit::mValue>::iterator j = layer_json.begin();
161 j != layer_json.end();
162 ++j, ++index) {
163 if (index == 0) {
164 if (j->type() != json_spirit::str_type) {
165 stringstream element;
166 json_spirit::write(*j, element);
167 *ss << "the first element of the entry "
168 << element.str() << " (first is zero) "
169 << position << " in " << description_string
170 << " is of type " << (*j).type() << " instead of string" << std::endl;
171 return ERROR_LRC_STR;
172 }
173 layers.push_back(Layer(j->get_str()));
174 Layer &layer = layers.back();
175 layer.chunks_map = j->get_str();
176 } else if(index == 1) {
177 Layer &layer = layers.back();
178 if (j->type() != json_spirit::str_type &&
179 j->type() != json_spirit::obj_type) {
180 stringstream element;
181 json_spirit::write(*j, element);
182 *ss << "the second element of the entry "
183 << element.str() << " (first is zero) "
184 << position << " in " << description_string
185 << " is of type " << (*j).type() << " instead of string or object"
186 << std::endl;
187 return ERROR_LRC_CONFIG_OPTIONS;
188 }
189 if (j->type() == json_spirit::str_type) {
190 int err = get_json_str_map(j->get_str(), *ss, &layer.profile);
191 if (err)
192 return err;
193 } else if (j->type() == json_spirit::obj_type) {
194 json_spirit::mObject o = j->get_obj();
195
196 for (map<string, json_spirit::mValue>::iterator i = o.begin();
197 i != o.end();
198 ++i) {
199 layer.profile[i->first] = i->second.get_str();
200 }
201 }
202 } else {
203 // ignore trailing elements
204 }
205 }
206 }
207 return 0;
208 }
209
210 int ErasureCodeLrc::layers_init(ostream *ss)
211 {
212 ErasureCodePluginRegistry &registry = ErasureCodePluginRegistry::instance();
213 for (unsigned int i = 0; i < layers.size(); i++) {
214 Layer &layer = layers[i];
215 int position = 0;
216 for(std::string::iterator it = layer.chunks_map.begin();
217 it != layer.chunks_map.end();
218 ++it) {
219 if (*it == 'D')
220 layer.data.push_back(position);
221 if (*it == 'c')
222 layer.coding.push_back(position);
223 if (*it == 'c' || *it == 'D')
224 layer.chunks_as_set.insert(position);
225 position++;
226 }
227 layer.chunks = layer.data;
228 layer.chunks.insert(layer.chunks.end(),
229 layer.coding.begin(), layer.coding.end());
230 if (layer.profile.find("k") == layer.profile.end())
231 layer.profile["k"] = stringify(layer.data.size());
232 if (layer.profile.find("m") == layer.profile.end())
233 layer.profile["m"] = stringify(layer.coding.size());
234 if (layer.profile.find("plugin") == layer.profile.end())
235 layer.profile["plugin"] = "jerasure";
236 if (layer.profile.find("technique") == layer.profile.end())
237 layer.profile["technique"] = "reed_sol_van";
238 int err = registry.factory(layer.profile["plugin"],
239 directory,
240 layer.profile,
241 &layer.erasure_code,
242 ss);
243 if (err)
244 return err;
245 }
246 return 0;
247 }
248
249 int ErasureCodeLrc::layers_sanity_checks(const string &description_string,
250 ostream *ss) const
251 {
252 int position = 0;
253
254 if (layers.size() < 1) {
255 *ss << "layers parameter has " << layers.size()
256 << " which is less than the minimum of one. "
257 << description_string << std::endl;
258 return ERROR_LRC_LAYERS_COUNT;
259 }
260 for (vector<Layer>::const_iterator layer = layers.begin();
261 layer != layers.end();
262 ++layer) {
263 if (chunk_count != layer->chunks_map.length()) {
264 *ss << "the first element of the array at position "
265 << position << " (starting from zero) "
266 << " is the string '" << layer->chunks_map
267 << " found in the layers parameter "
268 << description_string << ". It is expected to be "
269 << chunk_count << " characters long but is "
270 << layer->chunks_map.length() << " characters long instead "
271 << std::endl;
272 return ERROR_LRC_MAPPING_SIZE;
273 }
274 }
275 return 0;
276 }
277
278 int ErasureCodeLrc::parse(ErasureCodeProfile &profile,
279 ostream *ss)
280 {
281 int r = ErasureCode::parse(profile, ss);
282 if (r)
283 return r;
284
285 return parse_rule(profile, ss);
286 }
287
288 const string ErasureCodeLrc::DEFAULT_KML("-1");
289
290 int ErasureCodeLrc::parse_kml(ErasureCodeProfile &profile,
291 ostream *ss)
292 {
293 int err = ErasureCode::parse(profile, ss);
294 const int DEFAULT_INT = -1;
295 int k, m, l;
296 err |= to_int("k", profile, &k, DEFAULT_KML, ss);
297 err |= to_int("m", profile, &m, DEFAULT_KML, ss);
298 err |= to_int("l", profile, &l, DEFAULT_KML, ss);
299
300 if (k == DEFAULT_INT && m == DEFAULT_INT && l == DEFAULT_INT)
301 return err;
302
303 if ((k != DEFAULT_INT || m != DEFAULT_INT || l != DEFAULT_INT) &&
304 (k == DEFAULT_INT || m == DEFAULT_INT || l == DEFAULT_INT)) {
305 *ss << "All of k, m, l must be set or none of them in "
306 << profile << std::endl;
307 return ERROR_LRC_ALL_OR_NOTHING;
308 }
309
310 const char *generated[] = { "mapping",
311 "layers",
312 "crush-steps" };
313
314 for (int i = 0; i < 3; i++) {
315 if (profile.count(generated[i])) {
316 *ss << "The " << generated[i] << " parameter cannot be set "
317 << "when k, m, l are set in " << profile << std::endl;
318 return ERROR_LRC_GENERATED;
319 }
320 }
321
322 if (l == 0 || (k + m) % l) {
323 *ss << "k + m must be a multiple of l in "
324 << profile << std::endl;
325 return ERROR_LRC_K_M_MODULO;
326 }
327
328 int local_group_count = (k + m) / l;
329
330 if (k % local_group_count) {
331 *ss << "k must be a multiple of (k + m) / l in "
332 << profile << std::endl;
333 return ERROR_LRC_K_MODULO;
334 }
335
336 if (m % local_group_count) {
337 *ss << "m must be a multiple of (k + m) / l in "
338 << profile << std::endl;
339 return ERROR_LRC_M_MODULO;
340 }
341
342 string mapping;
343 for (int i = 0; i < local_group_count; i++) {
344 mapping += string(k / local_group_count, 'D') +
345 string(m / local_group_count, '_') + "_";
346 }
347 profile["mapping"] = mapping;
348
349 string layers = "[ ";
350
351 // global layer
352 layers += " [ \"";
353 for (int i = 0; i < local_group_count; i++) {
354 layers += string(k / local_group_count, 'D') +
355 string(m / local_group_count, 'c') + "_";
356 }
357 layers += "\", \"\" ],";
358
359 // local layers
360 for (int i = 0; i < local_group_count; i++) {
361 layers += " [ \"";
362 for (int j = 0; j < local_group_count; j++) {
363 if (i == j)
364 layers += string(l, 'D') + "c";
365 else
366 layers += string(l + 1, '_');
367 }
368 layers += "\", \"\" ],";
369 }
370 profile["layers"] = layers + "]";
371
372 ErasureCodeProfile::const_iterator parameter;
373 string rule_locality;
374 parameter = profile.find("crush-locality");
375 if (parameter != profile.end())
376 rule_locality = parameter->second;
377 string rule_failure_domain = "host";
378 parameter = profile.find("crush-failure-domain");
379 if (parameter != profile.end())
380 rule_failure_domain = parameter->second;
381
382 if (rule_locality != "") {
383 rule_steps.clear();
384 rule_steps.push_back(Step("choose", rule_locality,
385 local_group_count));
386 rule_steps.push_back(Step("chooseleaf", rule_failure_domain,
387 l + 1));
388 } else if (rule_failure_domain != "") {
389 rule_steps.clear();
390 rule_steps.push_back(Step("chooseleaf", rule_failure_domain, 0));
391 }
392
393 return err;
394 }
395
396 int ErasureCodeLrc::parse_rule(ErasureCodeProfile &profile,
397 ostream *ss)
398 {
399 int err = 0;
400 err |= to_string("crush-root", profile,
401 &rule_root,
402 "default", ss);
403 err |= to_string("crush-device-class", profile,
404 &rule_device_class,
405 "", ss);
406 if (err) {
407 return err;
408 }
409 if (profile.count("crush-steps") != 0) {
410 rule_steps.clear();
411 string str = profile.find("crush-steps")->second;
412 json_spirit::mArray description;
413 try {
414 json_spirit::mValue json;
415 json_spirit::read_or_throw(str, json);
416
417 if (json.type() != json_spirit::array_type) {
418 *ss << "crush-steps='" << str
419 << "' must be a JSON array but is of type "
420 << json.type() << " instead" << std::endl;
421 return ERROR_LRC_ARRAY;
422 }
423 description = json.get_array();
424 } catch (json_spirit::Error_position &e) {
425 *ss << "failed to parse crush-steps='" << str << "'"
426 << " at line " << e.line_ << ", column " << e.column_
427 << " : " << e.reason_ << std::endl;
428 return ERROR_LRC_PARSE_JSON;
429 }
430
431 int position = 0;
432 for (vector<json_spirit::mValue>::iterator i = description.begin();
433 i != description.end();
434 ++i, position++) {
435 if (i->type() != json_spirit::array_type) {
436 stringstream json_string;
437 json_spirit::write(*i, json_string);
438 *ss << "element of the array "
439 << str << " must be a JSON array but "
440 << json_string.str() << " at position " << position
441 << " is of type " << i->type() << " instead" << std::endl;
442 return ERROR_LRC_ARRAY;
443 }
444 int r = parse_rule_step(str, i->get_array(), ss);
445 if (r)
446 return r;
447 }
448 }
449 return 0;
450 }
451
452 int ErasureCodeLrc::parse_rule_step(const string &description_string,
453 json_spirit::mArray description,
454 ostream *ss)
455 {
456 stringstream json_string;
457 json_spirit::write(description, json_string);
458 string op;
459 string type;
460 int n = 0;
461 int position = 0;
462 for (vector<json_spirit::mValue>::iterator i = description.begin();
463 i != description.end();
464 ++i, position++) {
465 if ((position == 0 || position == 1) &&
466 i->type() != json_spirit::str_type) {
467 *ss << "element " << position << " of the array "
468 << json_string.str() << " found in " << description_string
469 << " must be a JSON string but is of type "
470 << i->type() << " instead" << std::endl;
471 return position == 0 ? ERROR_LRC_RULE_OP : ERROR_LRC_RULE_TYPE;
472 }
473 if (position == 2 && i->type() != json_spirit::int_type) {
474 *ss << "element " << position << " of the array "
475 << json_string.str() << " found in " << description_string
476 << " must be a JSON int but is of type "
477 << i->type() << " instead" << std::endl;
478 return ERROR_LRC_RULE_N;
479 }
480
481 if (position == 0)
482 op = i->get_str();
483 if (position == 1)
484 type = i->get_str();
485 if (position == 2)
486 n = i->get_int();
487 }
488 rule_steps.push_back(Step(op, type, n));
489 return 0;
490 }
491
492 int ErasureCodeLrc::init(ErasureCodeProfile &profile,
493 ostream *ss)
494 {
495 int r;
496
497 r = parse_kml(profile, ss);
498 if (r)
499 return r;
500
501 r = parse(profile, ss);
502 if (r)
503 return r;
504
505 json_spirit::mArray description;
506 r = layers_description(profile, &description, ss);
507 if (r)
508 return r;
509
510 string description_string = profile.find("layers")->second;
511
512 dout(10) << "init(" << description_string << ")" << dendl;
513
514 r = layers_parse(description_string, description, ss);
515 if (r)
516 return r;
517
518 r = layers_init(ss);
519 if (r)
520 return r;
521
522 if (profile.count("mapping") == 0) {
523 *ss << "the 'mapping' profile is missing from " << profile;
524 return ERROR_LRC_MAPPING;
525 }
526 string mapping = profile.find("mapping")->second;
527 data_chunk_count = count(begin(mapping), end(mapping), 'D');
528 chunk_count = mapping.length();
529
530 r = layers_sanity_checks(description_string, ss);
531 if (r)
532 return r;
533
534 //
535 // When initialized with kml, the profile parameters
536 // that were generated should not be stored because
537 // they would otherwise be exposed to the caller.
538 //
539 if (profile.find("l") != profile.end() &&
540 profile.find("l")->second != DEFAULT_KML) {
541 profile.erase("mapping");
542 profile.erase("layers");
543 }
544 ErasureCode::init(profile, ss);
545 return 0;
546 }
547
548 set<int> ErasureCodeLrc::get_erasures(const set<int> &want,
549 const set<int> &available) const
550 {
551 set<int> result;
552 set_difference(want.begin(), want.end(),
553 available.begin(), available.end(),
554 inserter(result, result.end()));
555 return result;
556 }
557
558 unsigned int ErasureCodeLrc::get_chunk_size(unsigned int object_size) const
559 {
560 return layers.front().erasure_code->get_chunk_size(object_size);
561 }
562
563 void p(const set<int> &s) { cerr << s; } // for gdb
564
565 int ErasureCodeLrc::_minimum_to_decode(const set<int> &want_to_read,
566 const set<int> &available_chunks,
567 set<int> *minimum)
568 {
569 dout(20) << __func__ << " want_to_read " << want_to_read
570 << " available_chunks " << available_chunks << dendl;
571 {
572 set<int> erasures_total;
573 set<int> erasures_not_recovered;
574 set<int> erasures_want;
575 for (unsigned int i = 0; i < get_chunk_count(); ++i) {
576 if (available_chunks.count(i) == 0) {
577 erasures_total.insert(i);
578 erasures_not_recovered.insert(i);
579 if (want_to_read.count(i) != 0)
580 erasures_want.insert(i);
581 }
582 }
583
584 //
585 // Case 1:
586 //
587 // When no chunk is missing there is no need to read more than what
588 // is wanted.
589 //
590 if (erasures_want.empty()) {
591 *minimum = want_to_read;
592 dout(20) << __func__ << " minimum == want_to_read == "
593 << want_to_read << dendl;
594 return 0;
595 }
596
597 //
598 // Case 2:
599 //
600 // Try to recover erasures with as few chunks as possible.
601 //
602 for (vector<Layer>::reverse_iterator i = layers.rbegin();
603 i != layers.rend();
604 ++i) {
605 //
606 // If this layer has no chunk that we want, skip it.
607 //
608 set<int> layer_want;
609 set_intersection(want_to_read.begin(), want_to_read.end(),
610 i->chunks_as_set.begin(), i->chunks_as_set.end(),
611 inserter(layer_want, layer_want.end()));
612 if (layer_want.empty())
613 continue;
614 //
615 // Are some of the chunks we want missing ?
616 //
617 set<int> layer_erasures;
618 set_intersection(layer_want.begin(), layer_want.end(),
619 erasures_want.begin(), erasures_want.end(),
620 inserter(layer_erasures, layer_erasures.end()));
621 set<int> layer_minimum;
622 if (layer_erasures.empty()) {
623 //
624 // The chunks we want are available, this is the minimum we need
625 // to read.
626 //
627 layer_minimum = layer_want;
628 } else {
629 set<int> erasures;
630 set_intersection(i->chunks_as_set.begin(), i->chunks_as_set.end(),
631 erasures_not_recovered.begin(), erasures_not_recovered.end(),
632 inserter(erasures, erasures.end()));
633
634 if (erasures.size() > i->erasure_code->get_coding_chunk_count()) {
635 //
636 // There are too many erasures for this layer to recover: skip
637 // it and hope that an upper layer will be do better.
638 //
639 continue;
640 } else {
641 //
642 // Get all available chunks in that layer to recover the
643 // missing one(s).
644 //
645 set_difference(i->chunks_as_set.begin(), i->chunks_as_set.end(),
646 erasures_not_recovered.begin(), erasures_not_recovered.end(),
647 inserter(layer_minimum, layer_minimum.end()));
648 //
649 // Chunks recovered by this layer are removed from the list of
650 // erasures so that upper levels do not attempt to recover
651 // them.
652 //
653 for (set<int>::const_iterator j = erasures.begin();
654 j != erasures.end();
655 ++j) {
656 erasures_not_recovered.erase(*j);
657 erasures_want.erase(*j);
658 }
659 }
660 }
661 minimum->insert(layer_minimum.begin(), layer_minimum.end());
662 }
663 if (erasures_want.empty()) {
664 minimum->insert(want_to_read.begin(), want_to_read.end());
665 for (set<int>::const_iterator i = erasures_total.begin();
666 i != erasures_total.end();
667 ++i) {
668 if (minimum->count(*i))
669 minimum->erase(*i);
670 }
671 dout(20) << __func__ << " minimum = " << *minimum << dendl;
672 return 0;
673 }
674 }
675
676 {
677 //
678 // Case 3:
679 //
680 // The previous strategy failed to recover from all erasures.
681 //
682 // Try to recover as many chunks as possible, even from layers
683 // that do not contain chunks that we want, in the hope that it
684 // will help the upper layers.
685 //
686 set<int> erasures_total;
687 for (unsigned int i = 0; i < get_chunk_count(); ++i) {
688 if (available_chunks.count(i) == 0)
689 erasures_total.insert(i);
690 }
691
692 for (vector<Layer>::reverse_iterator i = layers.rbegin();
693 i != layers.rend();
694 ++i) {
695 set<int> layer_erasures;
696 set_intersection(i->chunks_as_set.begin(), i->chunks_as_set.end(),
697 erasures_total.begin(), erasures_total.end(),
698 inserter(layer_erasures, layer_erasures.end()));
699 //
700 // If this layer has no erasure, skip it
701 //
702 if (layer_erasures.empty())
703 continue;
704
705 if (layer_erasures.size() > 0 &&
706 layer_erasures.size() <= i->erasure_code->get_coding_chunk_count()) {
707 //
708 // chunks recovered by this layer are removed from the list of
709 // erasures so that upper levels know they can rely on their
710 // availability
711 //
712 for (set<int>::const_iterator j = layer_erasures.begin();
713 j != layer_erasures.end();
714 ++j) {
715 erasures_total.erase(*j);
716 }
717 }
718 }
719 if (erasures_total.empty()) {
720 //
721 // Do not try to be smart about what chunks are necessary to
722 // recover, use all available chunks.
723 //
724 *minimum = available_chunks;
725 dout(20) << __func__ << " minimum == available_chunks == "
726 << available_chunks << dendl;
727 return 0;
728 }
729 }
730
731 derr << __func__ << " not enough chunks in " << available_chunks
732 << " to read " << want_to_read << dendl;
733 return -EIO;
734 }
735
736 int ErasureCodeLrc::encode_chunks(const set<int> &want_to_encode,
737 map<int, bufferlist> *encoded)
738 {
739 unsigned int top = layers.size();
740 for (vector<Layer>::reverse_iterator i = layers.rbegin();
741 i != layers.rend();
742 ++i) {
743 --top;
744 if (includes(i->chunks_as_set.begin(), i->chunks_as_set.end(),
745 want_to_encode.begin(), want_to_encode.end()))
746 break;
747 }
748
749 for (unsigned int i = top; i < layers.size(); ++i) {
750 const Layer &layer = layers[i];
751 set<int> layer_want_to_encode;
752 map<int, bufferlist> layer_encoded;
753 int j = 0;
754 for (const auto& c : layer.chunks) {
755 std::swap(layer_encoded[j], (*encoded)[c]);
756 if (want_to_encode.find(c) != want_to_encode.end())
757 layer_want_to_encode.insert(j);
758 j++;
759 }
760 int err = layer.erasure_code->encode_chunks(layer_want_to_encode,
761 &layer_encoded);
762 j = 0;
763 for (const auto& c : layer.chunks) {
764 std::swap(layer_encoded[j++], (*encoded)[c]);
765 }
766 if (err) {
767 derr << __func__ << " layer " << layer.chunks_map
768 << " failed with " << err << " trying to encode "
769 << layer_want_to_encode << dendl;
770 return err;
771 }
772 }
773 return 0;
774 }
775
776 int ErasureCodeLrc::decode_chunks(const set<int> &want_to_read,
777 const map<int, bufferlist> &chunks,
778 map<int, bufferlist> *decoded)
779 {
780 set<int> available_chunks;
781 set<int> erasures;
782 for (unsigned int i = 0; i < get_chunk_count(); ++i) {
783 if (chunks.count(i) != 0)
784 available_chunks.insert(i);
785 else
786 erasures.insert(i);
787 }
788
789 set<int> want_to_read_erasures;
790
791 for (vector<Layer>::reverse_iterator layer = layers.rbegin();
792 layer != layers.rend();
793 ++layer) {
794 set<int> layer_erasures;
795 set_intersection(layer->chunks_as_set.begin(), layer->chunks_as_set.end(),
796 erasures.begin(), erasures.end(),
797 inserter(layer_erasures, layer_erasures.end()));
798
799 if (layer_erasures.size() >
800 layer->erasure_code->get_coding_chunk_count()) {
801 // skip because there are too many erasures for this layer to recover
802 } else if(layer_erasures.size() == 0) {
803 // skip because all chunks are already available
804 } else {
805 set<int> layer_want_to_read;
806 map<int, bufferlist> layer_chunks;
807 map<int, bufferlist> layer_decoded;
808 int j = 0;
809 for (vector<int>::const_iterator c = layer->chunks.begin();
810 c != layer->chunks.end();
811 ++c) {
812 //
813 // Pick chunks from *decoded* instead of *chunks* to re-use
814 // chunks recovered by previous layers. In other words
815 // *chunks* does not change but *decoded* gradually improves
816 // as more layers recover from erasures.
817 //
818 if (erasures.count(*c) == 0)
819 layer_chunks[j] = (*decoded)[*c];
820 if (want_to_read.count(*c) != 0)
821 layer_want_to_read.insert(j);
822 layer_decoded[j] = (*decoded)[*c];
823 ++j;
824 }
825 int err = layer->erasure_code->decode_chunks(layer_want_to_read,
826 layer_chunks,
827 &layer_decoded);
828 if (err) {
829 derr << __func__ << " layer " << layer->chunks_map
830 << " failed with " << err << " trying to decode "
831 << layer_want_to_read << " with " << available_chunks << dendl;
832 return err;
833 }
834 j = 0;
835 for (vector<int>::const_iterator c = layer->chunks.begin();
836 c != layer->chunks.end();
837 ++c) {
838 (*decoded)[*c] = layer_decoded[j];
839 ++j;
840 erasures.erase(*c);
841 }
842 want_to_read_erasures.clear();
843 set_intersection(erasures.begin(), erasures.end(),
844 want_to_read.begin(), want_to_read.end(),
845 inserter(want_to_read_erasures, want_to_read_erasures.end()));
846 if (want_to_read_erasures.size() == 0)
847 break;
848 }
849 }
850
851 if (want_to_read_erasures.size() > 0) {
852 derr << __func__ << " want to read " << want_to_read
853 << " with available_chunks = " << available_chunks
854 << " end up being unable to read " << want_to_read_erasures << dendl;
855 return -EIO;
856 } else {
857 return 0;
858 }
859 }