]> git.proxmox.com Git - ceph.git/blame - ceph/src/erasure-code/lrc/ErasureCodeLrc.cc
update sources to v12.1.1
[ceph.git] / ceph / src / erasure-code / lrc / ErasureCodeLrc.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
7 * Copyright (C) 2014 Red Hat <contact@redhat.com>
8 *
9 * Author: Loic Dachary <loic@dachary.org>
10 *
11 * This library is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
15 *
16 */
17
18#include <errno.h>
19#include <algorithm>
20
21#include "include/str_map.h"
22#include "common/debug.h"
23#include "crush/CrushWrapper.h"
24#include "osd/osd_types.h"
25#include "include/stringify.h"
26#include "erasure-code/ErasureCodePlugin.h"
27#include "json_spirit/json_spirit_writer.h"
28
29#include "ErasureCodeLrc.h"
30
31// re-include our assert to clobber boost's
32#include "include/assert.h"
33
34#define dout_context g_ceph_context
35#define dout_subsys ceph_subsys_osd
36#undef dout_prefix
37#define dout_prefix _prefix(_dout)
38
31f18b77
FG
39using namespace std;
40
7c673cae
FG
41static ostream& _prefix(std::ostream* _dout)
42{
43 return *_dout << "ErasureCodeLrc: ";
44}
45
224ce89b 46int ErasureCodeLrc::create_rule(const string &name,
7c673cae
FG
47 CrushWrapper &crush,
48 ostream *ss) const
49{
50 if (crush.rule_exists(name)) {
51 *ss << "rule " << name << " exists";
52 return -EEXIST;
53 }
224ce89b
WB
54 if (!crush.name_exists(rule_root)) {
55 *ss << "root item " << rule_root << " does not exist";
7c673cae
FG
56 return -ENOENT;
57 }
224ce89b
WB
58 int root = crush.get_item_id(rule_root);
59 if (rule_device_class.size()) {
60 if (!crush.class_exists(rule_device_class)) {
61 *ss << "device class " << rule_device_class << " does not exist";
62 return -ENOENT;
63 }
64 int c = crush.get_class_id(rule_device_class);
65 if (crush.class_bucket.count(root) == 0 ||
66 crush.class_bucket[root].count(c) == 0) {
67 *ss << "root item " << rule_root << " has no devices with class "
68 << rule_device_class;
69 return -EINVAL;
70 }
71 root = crush.class_bucket[root][c];
72 }
7c673cae 73
224ce89b 74 int rule = 0;
7c673cae
FG
75 int rno = 0;
76 for (rno = 0; rno < crush.get_max_rules(); rno++) {
77 if (!crush.rule_exists(rno) && !crush.ruleset_exists(rno))
78 break;
79 }
224ce89b 80 rule = rno;
7c673cae 81
224ce89b 82 int steps = 4 + rule_steps.size();
7c673cae
FG
83 int min_rep = 3;
84 int max_rep = get_chunk_count();
85 int ret;
224ce89b 86 ret = crush.add_rule(steps, rule, pg_pool_t::TYPE_ERASURE,
7c673cae
FG
87 min_rep, max_rep, rno);
88 assert(ret == rno);
89 int step = 0;
90
91 ret = crush.set_rule_step(rno, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0);
92 assert(ret == 0);
93 ret = crush.set_rule_step(rno, step++, CRUSH_RULE_SET_CHOOSE_TRIES, 100, 0);
94 assert(ret == 0);
95 ret = crush.set_rule_step(rno, step++, CRUSH_RULE_TAKE, root, 0);
96 assert(ret == 0);
97 // [ [ "choose", "rack", 2 ],
98 // [ "chooseleaf", "host", 5 ] ]
224ce89b
WB
99 for (vector<Step>::const_iterator i = rule_steps.begin();
100 i != rule_steps.end();
7c673cae
FG
101 ++i) {
102 int op = i->op == "chooseleaf" ?
103 CRUSH_RULE_CHOOSELEAF_INDEP : CRUSH_RULE_CHOOSE_INDEP;
104 int type = crush.get_type_id(i->type);
105 if (type < 0) {
106 *ss << "unknown crush type " << i->type;
107 return -EINVAL;
108 }
109 ret = crush.set_rule_step(rno, step++, op, i->n, type);
110 assert(ret == 0);
111 }
112 ret = crush.set_rule_step(rno, step++, CRUSH_RULE_EMIT, 0, 0);
113 assert(ret == 0);
114 crush.set_rule_name(rno, name);
224ce89b 115 return rule;
7c673cae
FG
116}
117
118int ErasureCodeLrc::layers_description(const ErasureCodeProfile &profile,
119 json_spirit::mArray *description,
120 ostream *ss) const
121{
122 if (profile.count("layers") == 0) {
123 *ss << "could not find 'layers' in " << profile << std::endl;
124 return ERROR_LRC_DESCRIPTION;
125 }
126 string str = profile.find("layers")->second;
127 try {
128 json_spirit::mValue json;
129 json_spirit::read_or_throw(str, json);
130
131 if (json.type() != json_spirit::array_type) {
132 *ss << "layers='" << str
133 << "' must be a JSON array but is of type "
134 << json.type() << " instead" << std::endl;
135 return ERROR_LRC_ARRAY;
136 }
137 *description = json.get_array();
138 } catch (json_spirit::Error_position &e) {
139 *ss << "failed to parse layers='" << str << "'"
140 << " at line " << e.line_ << ", column " << e.column_
141 << " : " << e.reason_ << std::endl;
142 return ERROR_LRC_PARSE_JSON;
143 }
144 return 0;
145}
146
147int ErasureCodeLrc::layers_parse(string description_string,
148 json_spirit::mArray description,
149 ostream *ss)
150{
151 int position = 0;
152 for (vector<json_spirit::mValue>::iterator i = description.begin();
153 i != description.end();
154 ++i, position++) {
155 if (i->type() != json_spirit::array_type) {
156 stringstream json_string;
157 json_spirit::write(*i, json_string);
158 *ss << "each element of the array "
159 << description_string << " must be a JSON array but "
160 << json_string.str() << " at position " << position
161 << " is of type " << i->type() << " instead" << std::endl;
162 return ERROR_LRC_ARRAY;
163 }
164 json_spirit::mArray layer_json = i->get_array();
165 ErasureCodeProfile profile;
166 int index = 0;
167 for (vector<json_spirit::mValue>::iterator j = layer_json.begin();
168 j != layer_json.end();
169 ++j, ++index) {
170 if (index == 0) {
171 if (j->type() != json_spirit::str_type) {
172 stringstream element;
173 json_spirit::write(*j, element);
174 *ss << "the first element of the entry "
175 << element.str() << " (first is zero) "
176 << position << " in " << description_string
177 << " is of type " << (*j).type() << " instead of string" << std::endl;
178 return ERROR_LRC_STR;
179 }
180 layers.push_back(Layer(j->get_str()));
181 Layer &layer = layers.back();
182 layer.chunks_map = j->get_str();
183 } else if(index == 1) {
184 Layer &layer = layers.back();
185 if (j->type() != json_spirit::str_type &&
186 j->type() != json_spirit::obj_type) {
187 stringstream element;
188 json_spirit::write(*j, element);
189 *ss << "the second element of the entry "
190 << element.str() << " (first is zero) "
191 << position << " in " << description_string
192 << " is of type " << (*j).type() << " instead of string or object"
193 << std::endl;
194 return ERROR_LRC_CONFIG_OPTIONS;
195 }
196 if (j->type() == json_spirit::str_type) {
197 int err = get_json_str_map(j->get_str(), *ss, &layer.profile);
198 if (err)
199 return err;
200 } else if (j->type() == json_spirit::obj_type) {
201 json_spirit::mObject o = j->get_obj();
202
203 for (map<string, json_spirit::mValue>::iterator i = o.begin();
204 i != o.end();
205 ++i) {
206 layer.profile[i->first] = i->second.get_str();
207 }
208 }
209 } else {
210 // ignore trailing elements
211 }
212 }
213 }
214 return 0;
215}
216
217int ErasureCodeLrc::layers_init(ostream *ss)
218{
219 ErasureCodePluginRegistry &registry = ErasureCodePluginRegistry::instance();
220 for (unsigned int i = 0; i < layers.size(); i++) {
221 Layer &layer = layers[i];
222 int position = 0;
223 for(std::string::iterator it = layer.chunks_map.begin();
224 it != layer.chunks_map.end();
225 ++it) {
226 if (*it == 'D')
227 layer.data.push_back(position);
228 if (*it == 'c')
229 layer.coding.push_back(position);
230 if (*it == 'c' || *it == 'D')
231 layer.chunks_as_set.insert(position);
232 position++;
233 }
234 layer.chunks = layer.data;
235 layer.chunks.insert(layer.chunks.end(),
236 layer.coding.begin(), layer.coding.end());
237 if (layer.profile.find("k") == layer.profile.end())
238 layer.profile["k"] = stringify(layer.data.size());
239 if (layer.profile.find("m") == layer.profile.end())
240 layer.profile["m"] = stringify(layer.coding.size());
241 if (layer.profile.find("plugin") == layer.profile.end())
242 layer.profile["plugin"] = "jerasure";
243 if (layer.profile.find("technique") == layer.profile.end())
244 layer.profile["technique"] = "reed_sol_van";
245 int err = registry.factory(layer.profile["plugin"],
246 directory,
247 layer.profile,
248 &layer.erasure_code,
249 ss);
250 if (err)
251 return err;
252 }
253 return 0;
254}
255
256int ErasureCodeLrc::layers_sanity_checks(string description_string,
257 ostream *ss) const
258{
259 int position = 0;
260
261 if (layers.size() < 1) {
262 *ss << "layers parameter has " << layers.size()
263 << " which is less than the minimum of one. "
264 << description_string << std::endl;
265 return ERROR_LRC_LAYERS_COUNT;
266 }
267 for (vector<Layer>::const_iterator layer = layers.begin();
268 layer != layers.end();
269 ++layer) {
270 if (chunk_count != layer->chunks_map.length()) {
271 *ss << "the first element of the array at position "
272 << position << " (starting from zero) "
273 << " is the string '" << layer->chunks_map
274 << " found in the layers parameter "
275 << description_string << ". It is expected to be "
276 << chunk_count << " characters long but is "
277 << layer->chunks_map.length() << " characters long instead "
278 << std::endl;
279 return ERROR_LRC_MAPPING_SIZE;
280 }
281 }
282 return 0;
283}
284
285int ErasureCodeLrc::parse(ErasureCodeProfile &profile,
286 ostream *ss)
287{
288 int r = ErasureCode::parse(profile, ss);
289 if (r)
290 return r;
291
224ce89b 292 return parse_rule(profile, ss);
7c673cae
FG
293}
294
295const string ErasureCodeLrc::DEFAULT_KML("-1");
296
297int ErasureCodeLrc::parse_kml(ErasureCodeProfile &profile,
298 ostream *ss)
299{
300 int err = ErasureCode::parse(profile, ss);
301 const int DEFAULT_INT = -1;
302 int k, m, l;
303 err |= to_int("k", profile, &k, DEFAULT_KML, ss);
304 err |= to_int("m", profile, &m, DEFAULT_KML, ss);
305 err |= to_int("l", profile, &l, DEFAULT_KML, ss);
306
307 if (k == DEFAULT_INT && m == DEFAULT_INT && l == DEFAULT_INT)
308 return err;
309
310 if ((k != DEFAULT_INT || m != DEFAULT_INT || l != DEFAULT_INT) &&
311 (k == DEFAULT_INT || m == DEFAULT_INT || l == DEFAULT_INT)) {
312 *ss << "All of k, m, l must be set or none of them in "
313 << profile << std::endl;
314 return ERROR_LRC_ALL_OR_NOTHING;
315 }
316
317 const char *generated[] = { "mapping",
318 "layers",
224ce89b 319 "crush-steps" };
7c673cae
FG
320
321 for (int i = 0; i < 3; i++) {
322 if (profile.count(generated[i])) {
323 *ss << "The " << generated[i] << " parameter cannot be set "
324 << "when k, m, l are set in " << profile << std::endl;
325 return ERROR_LRC_GENERATED;
326 }
327 }
328
329 if ((k + m) % l) {
330 *ss << "k + m must be a multiple of l in "
331 << profile << std::endl;
332 return ERROR_LRC_K_M_MODULO;
333 }
334
335 int local_group_count = (k + m) / l;
336
337 if (k % local_group_count) {
338 *ss << "k must be a multiple of (k + m) / l in "
339 << profile << std::endl;
340 return ERROR_LRC_K_MODULO;
341 }
342
343 if (m % local_group_count) {
344 *ss << "m must be a multiple of (k + m) / l in "
345 << profile << std::endl;
346 return ERROR_LRC_M_MODULO;
347 }
348
349 string mapping;
350 for (int i = 0; i < local_group_count; i++) {
351 mapping += string(k / local_group_count, 'D') +
352 string(m / local_group_count, '_') + "_";
353 }
354 profile["mapping"] = mapping;
355
356 string layers = "[ ";
357
358 // global layer
359 layers += " [ \"";
360 for (int i = 0; i < local_group_count; i++) {
361 layers += string(k / local_group_count, 'D') +
362 string(m / local_group_count, 'c') + "_";
363 }
364 layers += "\", \"\" ],";
365
366 // local layers
367 for (int i = 0; i < local_group_count; i++) {
368 layers += " [ \"";
369 for (int j = 0; j < local_group_count; j++) {
370 if (i == j)
371 layers += string(l, 'D') + "c";
372 else
373 layers += string(l + 1, '_');
374 }
375 layers += "\", \"\" ],";
376 }
377 profile["layers"] = layers + "]";
378
379 ErasureCodeProfile::const_iterator parameter;
224ce89b
WB
380 string rule_locality;
381 parameter = profile.find("crush-locality");
7c673cae 382 if (parameter != profile.end())
224ce89b
WB
383 rule_locality = parameter->second;
384 string rule_failure_domain = "host";
385 parameter = profile.find("crush-failure-domain");
7c673cae 386 if (parameter != profile.end())
224ce89b 387 rule_failure_domain = parameter->second;
7c673cae 388
224ce89b
WB
389 if (rule_locality != "") {
390 rule_steps.clear();
391 rule_steps.push_back(Step("choose", rule_locality,
7c673cae 392 local_group_count));
224ce89b 393 rule_steps.push_back(Step("chooseleaf", rule_failure_domain,
7c673cae 394 l + 1));
224ce89b
WB
395 } else if (rule_failure_domain != "") {
396 rule_steps.clear();
397 rule_steps.push_back(Step("chooseleaf", rule_failure_domain, 0));
7c673cae
FG
398 }
399
400 return err;
401}
402
224ce89b 403int ErasureCodeLrc::parse_rule(ErasureCodeProfile &profile,
7c673cae
FG
404 ostream *ss)
405{
406 int err = 0;
224ce89b
WB
407 err |= to_string("crush-root", profile,
408 &rule_root,
7c673cae 409 "default", ss);
224ce89b
WB
410 err |= to_string("crush-device-class", profile,
411 &rule_device_class,
412 "", ss);
7c673cae 413
224ce89b
WB
414 if (profile.count("crush-steps") != 0) {
415 rule_steps.clear();
416 string str = profile.find("crush-steps")->second;
7c673cae
FG
417 json_spirit::mArray description;
418 try {
419 json_spirit::mValue json;
420 json_spirit::read_or_throw(str, json);
421
422 if (json.type() != json_spirit::array_type) {
224ce89b 423 *ss << "crush-steps='" << str
7c673cae
FG
424 << "' must be a JSON array but is of type "
425 << json.type() << " instead" << std::endl;
426 return ERROR_LRC_ARRAY;
427 }
428 description = json.get_array();
429 } catch (json_spirit::Error_position &e) {
224ce89b 430 *ss << "failed to parse crush-steps='" << str << "'"
7c673cae
FG
431 << " at line " << e.line_ << ", column " << e.column_
432 << " : " << e.reason_ << std::endl;
433 return ERROR_LRC_PARSE_JSON;
434 }
435
436 int position = 0;
437 for (vector<json_spirit::mValue>::iterator i = description.begin();
438 i != description.end();
439 ++i, position++) {
440 if (i->type() != json_spirit::array_type) {
441 stringstream json_string;
442 json_spirit::write(*i, json_string);
443 *ss << "element of the array "
444 << str << " must be a JSON array but "
445 << json_string.str() << " at position " << position
446 << " is of type " << i->type() << " instead" << std::endl;
447 return ERROR_LRC_ARRAY;
448 }
224ce89b 449 int r = parse_rule_step(str, i->get_array(), ss);
7c673cae
FG
450 if (r)
451 return r;
452 }
453 }
454 return 0;
455}
456
224ce89b 457int ErasureCodeLrc::parse_rule_step(string description_string,
7c673cae
FG
458 json_spirit::mArray description,
459 ostream *ss)
460{
461 stringstream json_string;
462 json_spirit::write(description, json_string);
463 string op;
464 string type;
465 int n = 0;
466 int position = 0;
467 for (vector<json_spirit::mValue>::iterator i = description.begin();
468 i != description.end();
469 ++i, position++) {
470 if ((position == 0 || position == 1) &&
471 i->type() != json_spirit::str_type) {
472 *ss << "element " << position << " of the array "
473 << json_string.str() << " found in " << description_string
474 << " must be a JSON string but is of type "
475 << i->type() << " instead" << std::endl;
224ce89b 476 return position == 0 ? ERROR_LRC_RULE_OP : ERROR_LRC_RULE_TYPE;
7c673cae
FG
477 }
478 if (position == 2 && i->type() != json_spirit::int_type) {
479 *ss << "element " << position << " of the array "
480 << json_string.str() << " found in " << description_string
481 << " must be a JSON int but is of type "
482 << i->type() << " instead" << std::endl;
224ce89b 483 return ERROR_LRC_RULE_N;
7c673cae
FG
484 }
485
486 if (position == 0)
487 op = i->get_str();
488 if (position == 1)
489 type = i->get_str();
490 if (position == 2)
491 n = i->get_int();
492 }
224ce89b 493 rule_steps.push_back(Step(op, type, n));
7c673cae
FG
494 return 0;
495}
496
497int ErasureCodeLrc::init(ErasureCodeProfile &profile,
498 ostream *ss)
499{
500 int r;
501
502 r = parse_kml(profile, ss);
503 if (r)
504 return r;
505
506 r = parse(profile, ss);
507 if (r)
508 return r;
509
510 json_spirit::mArray description;
511 r = layers_description(profile, &description, ss);
512 if (r)
513 return r;
514
515 string description_string = profile.find("layers")->second;
516
517 dout(10) << "init(" << description_string << ")" << dendl;
518
519 r = layers_parse(description_string, description, ss);
520 if (r)
521 return r;
522
523 r = layers_init(ss);
524 if (r)
525 return r;
526
527 if (profile.count("mapping") == 0) {
528 *ss << "the 'mapping' profile is missing from " << profile;
529 return ERROR_LRC_MAPPING;
530 }
531 string mapping = profile.find("mapping")->second;
532 data_chunk_count = 0;
533 for(std::string::iterator it = mapping.begin(); it != mapping.end(); ++it) {
534 if (*it == 'D')
535 data_chunk_count++;
536 }
537 chunk_count = mapping.length();
538
539 r = layers_sanity_checks(description_string, ss);
540 if (r)
541 return r;
542
543 //
544 // When initialized with kml, the profile parameters
545 // that were generated should not be stored because
546 // they would otherwise be exposed to the caller.
547 //
548 if (profile.find("l") != profile.end() &&
549 profile.find("l")->second != DEFAULT_KML) {
550 profile.erase("mapping");
551 profile.erase("layers");
552 }
553 ErasureCode::init(profile, ss);
554 return 0;
555}
556
557set<int> ErasureCodeLrc::get_erasures(const set<int> &want,
558 const set<int> &available) const
559{
560 set<int> result;
561 set_difference(want.begin(), want.end(),
562 available.begin(), available.end(),
563 inserter(result, result.end()));
564 return result;
565}
566
567unsigned int ErasureCodeLrc::get_chunk_size(unsigned int object_size) const
568{
569 return layers.front().erasure_code->get_chunk_size(object_size);
570}
571
572void p(const set<int> &s) { cerr << s; } // for gdb
573
574int ErasureCodeLrc::minimum_to_decode(const set<int> &want_to_read,
575 const set<int> &available_chunks,
576 set<int> *minimum)
577{
578 dout(20) << __func__ << " want_to_read " << want_to_read
579 << " available_chunks " << available_chunks << dendl;
580 {
581 set<int> erasures_total;
582 set<int> erasures_not_recovered;
583 set<int> erasures_want;
584 for (unsigned int i = 0; i < get_chunk_count(); ++i) {
585 if (available_chunks.count(i) == 0) {
586 erasures_total.insert(i);
587 erasures_not_recovered.insert(i);
588 if (want_to_read.count(i) != 0)
589 erasures_want.insert(i);
590 }
591 }
592
593 //
594 // Case 1:
595 //
596 // When no chunk is missing there is no need to read more than what
597 // is wanted.
598 //
599 if (erasures_want.empty()) {
600 *minimum = want_to_read;
601 dout(20) << __func__ << " minimum == want_to_read == "
602 << want_to_read << dendl;
603 return 0;
604 }
605
606 //
607 // Case 2:
608 //
609 // Try to recover erasures with as few chunks as possible.
610 //
611 for (vector<Layer>::reverse_iterator i = layers.rbegin();
612 i != layers.rend();
613 ++i) {
614 //
615 // If this layer has no chunk that we want, skip it.
616 //
617 set<int> layer_want;
618 set_intersection(want_to_read.begin(), want_to_read.end(),
619 i->chunks_as_set.begin(), i->chunks_as_set.end(),
620 inserter(layer_want, layer_want.end()));
621 if (layer_want.empty())
622 continue;
623 //
624 // Are some of the chunks we want missing ?
625 //
626 set<int> layer_erasures;
627 set_intersection(layer_want.begin(), layer_want.end(),
628 erasures_want.begin(), erasures_want.end(),
629 inserter(layer_erasures, layer_erasures.end()));
630 set<int> layer_minimum;
631 if (layer_erasures.empty()) {
632 //
633 // The chunks we want are available, this is the minimum we need
634 // to read.
635 //
636 layer_minimum = layer_want;
637 } else {
638 set<int> erasures;
639 set_intersection(i->chunks_as_set.begin(), i->chunks_as_set.end(),
640 erasures_not_recovered.begin(), erasures_not_recovered.end(),
641 inserter(erasures, erasures.end()));
642
643 if (erasures.size() > i->erasure_code->get_coding_chunk_count()) {
644 //
645 // There are too many erasures for this layer to recover: skip
646 // it and hope that an upper layer will be do better.
647 //
648 continue;
649 } else {
650 //
651 // Get all available chunks in that layer to recover the
652 // missing one(s).
653 //
654 set_difference(i->chunks_as_set.begin(), i->chunks_as_set.end(),
655 erasures_not_recovered.begin(), erasures_not_recovered.end(),
656 inserter(layer_minimum, layer_minimum.end()));
657 //
658 // Chunks recovered by this layer are removed from the list of
659 // erasures so that upper levels do not attempt to recover
660 // them.
661 //
662 for (set<int>::const_iterator j = erasures.begin();
663 j != erasures.end();
664 ++j) {
665 erasures_not_recovered.erase(*j);
666 if (erasures_want.count(*j))
667 erasures_want.erase(*j);
668 }
669 }
670 }
671 minimum->insert(layer_minimum.begin(), layer_minimum.end());
672 }
673 if (erasures_want.empty()) {
674 minimum->insert(want_to_read.begin(), want_to_read.end());
675 for (set<int>::const_iterator i = erasures_total.begin();
676 i != erasures_total.end();
677 ++i) {
678 if (minimum->count(*i))
679 minimum->erase(*i);
680 }
681 dout(20) << __func__ << " minimum = " << *minimum << dendl;
682 return 0;
683 }
684 }
685
686 {
687 //
688 // Case 3:
689 //
690 // The previous strategy failed to recover from all erasures.
691 //
692 // Try to recover as many chunks as possible, even from layers
693 // that do not contain chunks that we want, in the hope that it
694 // will help the upper layers.
695 //
696 set<int> erasures_total;
697 for (unsigned int i = 0; i < get_chunk_count(); ++i) {
698 if (available_chunks.count(i) == 0)
699 erasures_total.insert(i);
700 }
701
702 for (vector<Layer>::reverse_iterator i = layers.rbegin();
703 i != layers.rend();
704 ++i) {
705 set<int> layer_erasures;
706 set_intersection(i->chunks_as_set.begin(), i->chunks_as_set.end(),
707 erasures_total.begin(), erasures_total.end(),
708 inserter(layer_erasures, layer_erasures.end()));
709 //
710 // If this layer has no erasure, skip it
711 //
712 if (layer_erasures.empty())
713 continue;
714
715 if (layer_erasures.size() > 0 &&
716 layer_erasures.size() <= i->erasure_code->get_coding_chunk_count()) {
717 //
718 // chunks recovered by this layer are removed from the list of
719 // erasures so that upper levels know they can rely on their
720 // availability
721 //
722 for (set<int>::const_iterator j = layer_erasures.begin();
723 j != layer_erasures.end();
724 ++j) {
725 erasures_total.erase(*j);
726 }
727 }
728 }
729 if (erasures_total.empty()) {
730 //
731 // Do not try to be smart about what chunks are necessary to
732 // recover, use all available chunks.
733 //
734 *minimum = available_chunks;
735 dout(20) << __func__ << " minimum == available_chunks == "
736 << available_chunks << dendl;
737 return 0;
738 }
739 }
740
741 derr << __func__ << " not enough chunks in " << available_chunks
742 << " to read " << want_to_read << dendl;
743 return -EIO;
744}
745
746int ErasureCodeLrc::encode_chunks(const set<int> &want_to_encode,
747 map<int, bufferlist> *encoded)
748{
749 unsigned int top = layers.size();
750 for (vector<Layer>::reverse_iterator i = layers.rbegin();
751 i != layers.rend();
752 ++i) {
753 --top;
754 if (includes(i->chunks_as_set.begin(), i->chunks_as_set.end(),
755 want_to_encode.begin(), want_to_encode.end()))
756 break;
757 }
758
759 for (unsigned int i = top; i < layers.size(); ++i) {
760 const Layer &layer = layers[i];
761 set<int> layer_want_to_encode;
762 map<int, bufferlist> layer_encoded;
763 int j = 0;
764 for (vector<int>::const_iterator c = layer.chunks.begin();
765 c != layer.chunks.end();
766 ++c) {
767 layer_encoded[j] = (*encoded)[*c];
768 if (want_to_encode.find(*c) != want_to_encode.end())
769 layer_want_to_encode.insert(j);
770 j++;
771 }
772 int err = layer.erasure_code->encode_chunks(layer_want_to_encode,
773 &layer_encoded);
774 if (err) {
775 derr << __func__ << " layer " << layer.chunks_map
776 << " failed with " << err << " trying to encode "
777 << layer_want_to_encode << dendl;
778 return err;
779 }
780 }
781 return 0;
782}
783
784int ErasureCodeLrc::decode_chunks(const set<int> &want_to_read,
785 const map<int, bufferlist> &chunks,
786 map<int, bufferlist> *decoded)
787{
788 set<int> available_chunks;
789 set<int> erasures;
790 for (unsigned int i = 0; i < get_chunk_count(); ++i) {
791 if (chunks.count(i) != 0)
792 available_chunks.insert(i);
793 else
794 erasures.insert(i);
795 }
796
797 set<int> want_to_read_erasures;
798
799 for (vector<Layer>::reverse_iterator layer = layers.rbegin();
800 layer != layers.rend();
801 ++layer) {
802 set<int> layer_erasures;
803 set_intersection(layer->chunks_as_set.begin(), layer->chunks_as_set.end(),
804 erasures.begin(), erasures.end(),
805 inserter(layer_erasures, layer_erasures.end()));
806
807 if (layer_erasures.size() >
808 layer->erasure_code->get_coding_chunk_count()) {
809 // skip because there are too many erasures for this layer to recover
810 } else if(layer_erasures.size() == 0) {
811 // skip because all chunks are already available
812 } else {
813 set<int> layer_want_to_read;
814 map<int, bufferlist> layer_chunks;
815 map<int, bufferlist> layer_decoded;
816 int j = 0;
817 for (vector<int>::const_iterator c = layer->chunks.begin();
818 c != layer->chunks.end();
819 ++c) {
820 //
821 // Pick chunks from *decoded* instead of *chunks* to re-use
822 // chunks recovered by previous layers. In other words
823 // *chunks* does not change but *decoded* gradually improves
824 // as more layers recover from erasures.
825 //
826 if (erasures.count(*c) == 0)
827 layer_chunks[j] = (*decoded)[*c];
828 if (want_to_read.count(*c) != 0)
829 layer_want_to_read.insert(j);
830 layer_decoded[j] = (*decoded)[*c];
831 ++j;
832 }
833 int err = layer->erasure_code->decode_chunks(layer_want_to_read,
834 layer_chunks,
835 &layer_decoded);
836 if (err) {
837 derr << __func__ << " layer " << layer->chunks_map
838 << " failed with " << err << " trying to decode "
839 << layer_want_to_read << " with " << available_chunks << dendl;
840 return err;
841 }
842 j = 0;
843 for (vector<int>::const_iterator c = layer->chunks.begin();
844 c != layer->chunks.end();
845 ++c) {
846 (*decoded)[*c] = layer_decoded[j];
847 ++j;
848 if (erasures.count(*c) != 0)
849 erasures.erase(*c);
850 }
851 want_to_read_erasures.clear();
852 set_intersection(erasures.begin(), erasures.end(),
853 want_to_read.begin(), want_to_read.end(),
854 inserter(want_to_read_erasures, want_to_read_erasures.end()));
855 if (want_to_read_erasures.size() == 0)
856 break;
857 }
858 }
859
860 if (want_to_read_erasures.size() > 0) {
861 derr << __func__ << " want to read " << want_to_read
862 << " with available_chunks = " << available_chunks
863 << " end up being unable to read " << want_to_read_erasures << dendl;
864 return -EIO;
865 } else {
866 return 0;
867 }
868}