]> git.proxmox.com Git - ceph.git/blame - ceph/src/erasure-code/lrc/ErasureCodeLrc.cc
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / erasure-code / lrc / ErasureCodeLrc.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
7 * Copyright (C) 2014 Red Hat <contact@redhat.com>
8 *
9 * Author: Loic Dachary <loic@dachary.org>
10 *
11 * This library is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
15 *
16 */
17
18#include <errno.h>
19#include <algorithm>
20
21#include "include/str_map.h"
22#include "common/debug.h"
23#include "crush/CrushWrapper.h"
24#include "osd/osd_types.h"
25#include "include/stringify.h"
26#include "erasure-code/ErasureCodePlugin.h"
27#include "json_spirit/json_spirit_writer.h"
28
29#include "ErasureCodeLrc.h"
30
31// re-include our assert to clobber boost's
11fdf7f2 32#include "include/ceph_assert.h"
7c673cae
FG
33
34#define dout_context g_ceph_context
35#define dout_subsys ceph_subsys_osd
36#undef dout_prefix
37#define dout_prefix _prefix(_dout)
38
31f18b77
FG
39using namespace std;
40
7c673cae
FG
41static ostream& _prefix(std::ostream* _dout)
42{
43 return *_dout << "ErasureCodeLrc: ";
44}
45
224ce89b 46int ErasureCodeLrc::create_rule(const string &name,
7c673cae
FG
47 CrushWrapper &crush,
48 ostream *ss) const
49{
50 if (crush.rule_exists(name)) {
51 *ss << "rule " << name << " exists";
52 return -EEXIST;
53 }
224ce89b
WB
54 if (!crush.name_exists(rule_root)) {
55 *ss << "root item " << rule_root << " does not exist";
7c673cae
FG
56 return -ENOENT;
57 }
224ce89b
WB
58 int root = crush.get_item_id(rule_root);
59 if (rule_device_class.size()) {
60 if (!crush.class_exists(rule_device_class)) {
61 *ss << "device class " << rule_device_class << " does not exist";
62 return -ENOENT;
63 }
64 int c = crush.get_class_id(rule_device_class);
65 if (crush.class_bucket.count(root) == 0 ||
66 crush.class_bucket[root].count(c) == 0) {
67 *ss << "root item " << rule_root << " has no devices with class "
68 << rule_device_class;
69 return -EINVAL;
70 }
71 root = crush.class_bucket[root][c];
72 }
7c673cae 73
7c673cae
FG
74 int rno = 0;
75 for (rno = 0; rno < crush.get_max_rules(); rno++) {
76 if (!crush.rule_exists(rno) && !crush.ruleset_exists(rno))
77 break;
78 }
7c673cae 79
224ce89b 80 int steps = 4 + rule_steps.size();
7c673cae
FG
81 int min_rep = 3;
82 int max_rep = get_chunk_count();
83 int ret;
c07f9fc5
FG
84 ret = crush.add_rule(rno, steps, pg_pool_t::TYPE_ERASURE,
85 min_rep, max_rep);
11fdf7f2 86 ceph_assert(ret == rno);
7c673cae
FG
87 int step = 0;
88
89 ret = crush.set_rule_step(rno, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0);
11fdf7f2 90 ceph_assert(ret == 0);
7c673cae 91 ret = crush.set_rule_step(rno, step++, CRUSH_RULE_SET_CHOOSE_TRIES, 100, 0);
11fdf7f2 92 ceph_assert(ret == 0);
7c673cae 93 ret = crush.set_rule_step(rno, step++, CRUSH_RULE_TAKE, root, 0);
11fdf7f2 94 ceph_assert(ret == 0);
7c673cae
FG
95 // [ [ "choose", "rack", 2 ],
96 // [ "chooseleaf", "host", 5 ] ]
224ce89b
WB
97 for (vector<Step>::const_iterator i = rule_steps.begin();
98 i != rule_steps.end();
7c673cae
FG
99 ++i) {
100 int op = i->op == "chooseleaf" ?
101 CRUSH_RULE_CHOOSELEAF_INDEP : CRUSH_RULE_CHOOSE_INDEP;
102 int type = crush.get_type_id(i->type);
103 if (type < 0) {
104 *ss << "unknown crush type " << i->type;
105 return -EINVAL;
106 }
107 ret = crush.set_rule_step(rno, step++, op, i->n, type);
11fdf7f2 108 ceph_assert(ret == 0);
7c673cae
FG
109 }
110 ret = crush.set_rule_step(rno, step++, CRUSH_RULE_EMIT, 0, 0);
11fdf7f2 111 ceph_assert(ret == 0);
7c673cae 112 crush.set_rule_name(rno, name);
c07f9fc5 113 return rno;
7c673cae
FG
114}
115
116int ErasureCodeLrc::layers_description(const ErasureCodeProfile &profile,
117 json_spirit::mArray *description,
118 ostream *ss) const
119{
120 if (profile.count("layers") == 0) {
121 *ss << "could not find 'layers' in " << profile << std::endl;
122 return ERROR_LRC_DESCRIPTION;
123 }
124 string str = profile.find("layers")->second;
125 try {
126 json_spirit::mValue json;
127 json_spirit::read_or_throw(str, json);
128
129 if (json.type() != json_spirit::array_type) {
130 *ss << "layers='" << str
131 << "' must be a JSON array but is of type "
132 << json.type() << " instead" << std::endl;
133 return ERROR_LRC_ARRAY;
134 }
135 *description = json.get_array();
136 } catch (json_spirit::Error_position &e) {
137 *ss << "failed to parse layers='" << str << "'"
138 << " at line " << e.line_ << ", column " << e.column_
139 << " : " << e.reason_ << std::endl;
140 return ERROR_LRC_PARSE_JSON;
141 }
142 return 0;
143}
144
11fdf7f2 145int ErasureCodeLrc::layers_parse(const string &description_string,
7c673cae
FG
146 json_spirit::mArray description,
147 ostream *ss)
148{
149 int position = 0;
150 for (vector<json_spirit::mValue>::iterator i = description.begin();
151 i != description.end();
152 ++i, position++) {
153 if (i->type() != json_spirit::array_type) {
154 stringstream json_string;
155 json_spirit::write(*i, json_string);
156 *ss << "each element of the array "
157 << description_string << " must be a JSON array but "
158 << json_string.str() << " at position " << position
159 << " is of type " << i->type() << " instead" << std::endl;
160 return ERROR_LRC_ARRAY;
161 }
162 json_spirit::mArray layer_json = i->get_array();
163 ErasureCodeProfile profile;
164 int index = 0;
165 for (vector<json_spirit::mValue>::iterator j = layer_json.begin();
166 j != layer_json.end();
167 ++j, ++index) {
168 if (index == 0) {
169 if (j->type() != json_spirit::str_type) {
170 stringstream element;
171 json_spirit::write(*j, element);
172 *ss << "the first element of the entry "
173 << element.str() << " (first is zero) "
174 << position << " in " << description_string
175 << " is of type " << (*j).type() << " instead of string" << std::endl;
176 return ERROR_LRC_STR;
177 }
178 layers.push_back(Layer(j->get_str()));
179 Layer &layer = layers.back();
180 layer.chunks_map = j->get_str();
181 } else if(index == 1) {
182 Layer &layer = layers.back();
183 if (j->type() != json_spirit::str_type &&
184 j->type() != json_spirit::obj_type) {
185 stringstream element;
186 json_spirit::write(*j, element);
187 *ss << "the second element of the entry "
188 << element.str() << " (first is zero) "
189 << position << " in " << description_string
190 << " is of type " << (*j).type() << " instead of string or object"
191 << std::endl;
192 return ERROR_LRC_CONFIG_OPTIONS;
193 }
194 if (j->type() == json_spirit::str_type) {
195 int err = get_json_str_map(j->get_str(), *ss, &layer.profile);
196 if (err)
197 return err;
198 } else if (j->type() == json_spirit::obj_type) {
199 json_spirit::mObject o = j->get_obj();
200
201 for (map<string, json_spirit::mValue>::iterator i = o.begin();
202 i != o.end();
203 ++i) {
204 layer.profile[i->first] = i->second.get_str();
205 }
206 }
207 } else {
208 // ignore trailing elements
209 }
210 }
211 }
212 return 0;
213}
214
215int ErasureCodeLrc::layers_init(ostream *ss)
216{
217 ErasureCodePluginRegistry &registry = ErasureCodePluginRegistry::instance();
218 for (unsigned int i = 0; i < layers.size(); i++) {
219 Layer &layer = layers[i];
220 int position = 0;
221 for(std::string::iterator it = layer.chunks_map.begin();
222 it != layer.chunks_map.end();
223 ++it) {
224 if (*it == 'D')
225 layer.data.push_back(position);
226 if (*it == 'c')
227 layer.coding.push_back(position);
228 if (*it == 'c' || *it == 'D')
229 layer.chunks_as_set.insert(position);
230 position++;
231 }
232 layer.chunks = layer.data;
233 layer.chunks.insert(layer.chunks.end(),
234 layer.coding.begin(), layer.coding.end());
235 if (layer.profile.find("k") == layer.profile.end())
236 layer.profile["k"] = stringify(layer.data.size());
237 if (layer.profile.find("m") == layer.profile.end())
238 layer.profile["m"] = stringify(layer.coding.size());
239 if (layer.profile.find("plugin") == layer.profile.end())
240 layer.profile["plugin"] = "jerasure";
241 if (layer.profile.find("technique") == layer.profile.end())
242 layer.profile["technique"] = "reed_sol_van";
243 int err = registry.factory(layer.profile["plugin"],
244 directory,
245 layer.profile,
246 &layer.erasure_code,
247 ss);
248 if (err)
249 return err;
250 }
251 return 0;
252}
253
11fdf7f2 254int ErasureCodeLrc::layers_sanity_checks(const string &description_string,
7c673cae
FG
255 ostream *ss) const
256{
257 int position = 0;
258
259 if (layers.size() < 1) {
260 *ss << "layers parameter has " << layers.size()
261 << " which is less than the minimum of one. "
262 << description_string << std::endl;
263 return ERROR_LRC_LAYERS_COUNT;
264 }
265 for (vector<Layer>::const_iterator layer = layers.begin();
266 layer != layers.end();
267 ++layer) {
268 if (chunk_count != layer->chunks_map.length()) {
269 *ss << "the first element of the array at position "
270 << position << " (starting from zero) "
271 << " is the string '" << layer->chunks_map
272 << " found in the layers parameter "
273 << description_string << ". It is expected to be "
274 << chunk_count << " characters long but is "
275 << layer->chunks_map.length() << " characters long instead "
276 << std::endl;
277 return ERROR_LRC_MAPPING_SIZE;
278 }
279 }
280 return 0;
281}
282
283int ErasureCodeLrc::parse(ErasureCodeProfile &profile,
284 ostream *ss)
285{
286 int r = ErasureCode::parse(profile, ss);
287 if (r)
288 return r;
289
224ce89b 290 return parse_rule(profile, ss);
7c673cae
FG
291}
292
293const string ErasureCodeLrc::DEFAULT_KML("-1");
294
295int ErasureCodeLrc::parse_kml(ErasureCodeProfile &profile,
296 ostream *ss)
297{
298 int err = ErasureCode::parse(profile, ss);
299 const int DEFAULT_INT = -1;
300 int k, m, l;
301 err |= to_int("k", profile, &k, DEFAULT_KML, ss);
302 err |= to_int("m", profile, &m, DEFAULT_KML, ss);
303 err |= to_int("l", profile, &l, DEFAULT_KML, ss);
304
305 if (k == DEFAULT_INT && m == DEFAULT_INT && l == DEFAULT_INT)
306 return err;
307
308 if ((k != DEFAULT_INT || m != DEFAULT_INT || l != DEFAULT_INT) &&
309 (k == DEFAULT_INT || m == DEFAULT_INT || l == DEFAULT_INT)) {
310 *ss << "All of k, m, l must be set or none of them in "
311 << profile << std::endl;
312 return ERROR_LRC_ALL_OR_NOTHING;
313 }
314
315 const char *generated[] = { "mapping",
316 "layers",
224ce89b 317 "crush-steps" };
7c673cae
FG
318
319 for (int i = 0; i < 3; i++) {
320 if (profile.count(generated[i])) {
321 *ss << "The " << generated[i] << " parameter cannot be set "
322 << "when k, m, l are set in " << profile << std::endl;
323 return ERROR_LRC_GENERATED;
324 }
325 }
326
11fdf7f2 327 if (l == 0 || (k + m) % l) {
7c673cae
FG
328 *ss << "k + m must be a multiple of l in "
329 << profile << std::endl;
330 return ERROR_LRC_K_M_MODULO;
331 }
332
333 int local_group_count = (k + m) / l;
334
335 if (k % local_group_count) {
336 *ss << "k must be a multiple of (k + m) / l in "
337 << profile << std::endl;
338 return ERROR_LRC_K_MODULO;
339 }
340
341 if (m % local_group_count) {
342 *ss << "m must be a multiple of (k + m) / l in "
343 << profile << std::endl;
344 return ERROR_LRC_M_MODULO;
345 }
346
347 string mapping;
348 for (int i = 0; i < local_group_count; i++) {
349 mapping += string(k / local_group_count, 'D') +
350 string(m / local_group_count, '_') + "_";
351 }
352 profile["mapping"] = mapping;
353
354 string layers = "[ ";
355
356 // global layer
357 layers += " [ \"";
358 for (int i = 0; i < local_group_count; i++) {
359 layers += string(k / local_group_count, 'D') +
360 string(m / local_group_count, 'c') + "_";
361 }
362 layers += "\", \"\" ],";
363
364 // local layers
365 for (int i = 0; i < local_group_count; i++) {
366 layers += " [ \"";
367 for (int j = 0; j < local_group_count; j++) {
368 if (i == j)
369 layers += string(l, 'D') + "c";
370 else
371 layers += string(l + 1, '_');
372 }
373 layers += "\", \"\" ],";
374 }
375 profile["layers"] = layers + "]";
376
377 ErasureCodeProfile::const_iterator parameter;
224ce89b
WB
378 string rule_locality;
379 parameter = profile.find("crush-locality");
7c673cae 380 if (parameter != profile.end())
224ce89b
WB
381 rule_locality = parameter->second;
382 string rule_failure_domain = "host";
383 parameter = profile.find("crush-failure-domain");
7c673cae 384 if (parameter != profile.end())
224ce89b 385 rule_failure_domain = parameter->second;
7c673cae 386
224ce89b
WB
387 if (rule_locality != "") {
388 rule_steps.clear();
389 rule_steps.push_back(Step("choose", rule_locality,
7c673cae 390 local_group_count));
224ce89b 391 rule_steps.push_back(Step("chooseleaf", rule_failure_domain,
7c673cae 392 l + 1));
224ce89b
WB
393 } else if (rule_failure_domain != "") {
394 rule_steps.clear();
395 rule_steps.push_back(Step("chooseleaf", rule_failure_domain, 0));
7c673cae
FG
396 }
397
398 return err;
399}
400
224ce89b 401int ErasureCodeLrc::parse_rule(ErasureCodeProfile &profile,
7c673cae
FG
402 ostream *ss)
403{
404 int err = 0;
224ce89b
WB
405 err |= to_string("crush-root", profile,
406 &rule_root,
7c673cae 407 "default", ss);
224ce89b
WB
408 err |= to_string("crush-device-class", profile,
409 &rule_device_class,
410 "", ss);
7c673cae 411
224ce89b
WB
412 if (profile.count("crush-steps") != 0) {
413 rule_steps.clear();
414 string str = profile.find("crush-steps")->second;
7c673cae
FG
415 json_spirit::mArray description;
416 try {
417 json_spirit::mValue json;
418 json_spirit::read_or_throw(str, json);
419
420 if (json.type() != json_spirit::array_type) {
224ce89b 421 *ss << "crush-steps='" << str
7c673cae
FG
422 << "' must be a JSON array but is of type "
423 << json.type() << " instead" << std::endl;
424 return ERROR_LRC_ARRAY;
425 }
426 description = json.get_array();
427 } catch (json_spirit::Error_position &e) {
224ce89b 428 *ss << "failed to parse crush-steps='" << str << "'"
7c673cae
FG
429 << " at line " << e.line_ << ", column " << e.column_
430 << " : " << e.reason_ << std::endl;
431 return ERROR_LRC_PARSE_JSON;
432 }
433
434 int position = 0;
435 for (vector<json_spirit::mValue>::iterator i = description.begin();
436 i != description.end();
437 ++i, position++) {
438 if (i->type() != json_spirit::array_type) {
439 stringstream json_string;
440 json_spirit::write(*i, json_string);
441 *ss << "element of the array "
442 << str << " must be a JSON array but "
443 << json_string.str() << " at position " << position
444 << " is of type " << i->type() << " instead" << std::endl;
445 return ERROR_LRC_ARRAY;
446 }
224ce89b 447 int r = parse_rule_step(str, i->get_array(), ss);
7c673cae
FG
448 if (r)
449 return r;
450 }
451 }
452 return 0;
453}
454
11fdf7f2 455int ErasureCodeLrc::parse_rule_step(const string &description_string,
7c673cae
FG
456 json_spirit::mArray description,
457 ostream *ss)
458{
459 stringstream json_string;
460 json_spirit::write(description, json_string);
461 string op;
462 string type;
463 int n = 0;
464 int position = 0;
465 for (vector<json_spirit::mValue>::iterator i = description.begin();
466 i != description.end();
467 ++i, position++) {
468 if ((position == 0 || position == 1) &&
469 i->type() != json_spirit::str_type) {
470 *ss << "element " << position << " of the array "
471 << json_string.str() << " found in " << description_string
472 << " must be a JSON string but is of type "
473 << i->type() << " instead" << std::endl;
224ce89b 474 return position == 0 ? ERROR_LRC_RULE_OP : ERROR_LRC_RULE_TYPE;
7c673cae
FG
475 }
476 if (position == 2 && i->type() != json_spirit::int_type) {
477 *ss << "element " << position << " of the array "
478 << json_string.str() << " found in " << description_string
479 << " must be a JSON int but is of type "
480 << i->type() << " instead" << std::endl;
224ce89b 481 return ERROR_LRC_RULE_N;
7c673cae
FG
482 }
483
484 if (position == 0)
485 op = i->get_str();
486 if (position == 1)
487 type = i->get_str();
488 if (position == 2)
489 n = i->get_int();
490 }
224ce89b 491 rule_steps.push_back(Step(op, type, n));
7c673cae
FG
492 return 0;
493}
494
495int ErasureCodeLrc::init(ErasureCodeProfile &profile,
496 ostream *ss)
497{
498 int r;
499
500 r = parse_kml(profile, ss);
501 if (r)
502 return r;
503
504 r = parse(profile, ss);
505 if (r)
506 return r;
507
508 json_spirit::mArray description;
509 r = layers_description(profile, &description, ss);
510 if (r)
511 return r;
512
513 string description_string = profile.find("layers")->second;
514
515 dout(10) << "init(" << description_string << ")" << dendl;
516
517 r = layers_parse(description_string, description, ss);
518 if (r)
519 return r;
520
521 r = layers_init(ss);
522 if (r)
523 return r;
524
525 if (profile.count("mapping") == 0) {
526 *ss << "the 'mapping' profile is missing from " << profile;
527 return ERROR_LRC_MAPPING;
528 }
529 string mapping = profile.find("mapping")->second;
11fdf7f2 530 data_chunk_count = count(begin(mapping), end(mapping), 'D');
7c673cae
FG
531 chunk_count = mapping.length();
532
533 r = layers_sanity_checks(description_string, ss);
534 if (r)
535 return r;
536
537 //
538 // When initialized with kml, the profile parameters
539 // that were generated should not be stored because
540 // they would otherwise be exposed to the caller.
541 //
542 if (profile.find("l") != profile.end() &&
543 profile.find("l")->second != DEFAULT_KML) {
544 profile.erase("mapping");
545 profile.erase("layers");
546 }
547 ErasureCode::init(profile, ss);
548 return 0;
549}
550
551set<int> ErasureCodeLrc::get_erasures(const set<int> &want,
552 const set<int> &available) const
553{
554 set<int> result;
555 set_difference(want.begin(), want.end(),
556 available.begin(), available.end(),
557 inserter(result, result.end()));
558 return result;
559}
560
561unsigned int ErasureCodeLrc::get_chunk_size(unsigned int object_size) const
562{
563 return layers.front().erasure_code->get_chunk_size(object_size);
564}
565
566void p(const set<int> &s) { cerr << s; } // for gdb
567
11fdf7f2
TL
568int ErasureCodeLrc::_minimum_to_decode(const set<int> &want_to_read,
569 const set<int> &available_chunks,
570 set<int> *minimum)
7c673cae
FG
571{
572 dout(20) << __func__ << " want_to_read " << want_to_read
573 << " available_chunks " << available_chunks << dendl;
574 {
575 set<int> erasures_total;
576 set<int> erasures_not_recovered;
577 set<int> erasures_want;
578 for (unsigned int i = 0; i < get_chunk_count(); ++i) {
579 if (available_chunks.count(i) == 0) {
580 erasures_total.insert(i);
581 erasures_not_recovered.insert(i);
582 if (want_to_read.count(i) != 0)
583 erasures_want.insert(i);
584 }
585 }
586
587 //
588 // Case 1:
589 //
590 // When no chunk is missing there is no need to read more than what
591 // is wanted.
592 //
593 if (erasures_want.empty()) {
594 *minimum = want_to_read;
595 dout(20) << __func__ << " minimum == want_to_read == "
596 << want_to_read << dendl;
597 return 0;
598 }
599
600 //
601 // Case 2:
602 //
603 // Try to recover erasures with as few chunks as possible.
604 //
605 for (vector<Layer>::reverse_iterator i = layers.rbegin();
606 i != layers.rend();
607 ++i) {
608 //
609 // If this layer has no chunk that we want, skip it.
610 //
611 set<int> layer_want;
612 set_intersection(want_to_read.begin(), want_to_read.end(),
613 i->chunks_as_set.begin(), i->chunks_as_set.end(),
614 inserter(layer_want, layer_want.end()));
615 if (layer_want.empty())
616 continue;
617 //
618 // Are some of the chunks we want missing ?
619 //
620 set<int> layer_erasures;
621 set_intersection(layer_want.begin(), layer_want.end(),
622 erasures_want.begin(), erasures_want.end(),
623 inserter(layer_erasures, layer_erasures.end()));
624 set<int> layer_minimum;
625 if (layer_erasures.empty()) {
626 //
627 // The chunks we want are available, this is the minimum we need
628 // to read.
629 //
630 layer_minimum = layer_want;
631 } else {
632 set<int> erasures;
633 set_intersection(i->chunks_as_set.begin(), i->chunks_as_set.end(),
634 erasures_not_recovered.begin(), erasures_not_recovered.end(),
635 inserter(erasures, erasures.end()));
636
637 if (erasures.size() > i->erasure_code->get_coding_chunk_count()) {
638 //
639 // There are too many erasures for this layer to recover: skip
640 // it and hope that an upper layer will be do better.
641 //
642 continue;
643 } else {
644 //
645 // Get all available chunks in that layer to recover the
646 // missing one(s).
647 //
648 set_difference(i->chunks_as_set.begin(), i->chunks_as_set.end(),
649 erasures_not_recovered.begin(), erasures_not_recovered.end(),
650 inserter(layer_minimum, layer_minimum.end()));
651 //
652 // Chunks recovered by this layer are removed from the list of
653 // erasures so that upper levels do not attempt to recover
654 // them.
655 //
656 for (set<int>::const_iterator j = erasures.begin();
657 j != erasures.end();
658 ++j) {
659 erasures_not_recovered.erase(*j);
11fdf7f2 660 erasures_want.erase(*j);
7c673cae
FG
661 }
662 }
663 }
664 minimum->insert(layer_minimum.begin(), layer_minimum.end());
665 }
666 if (erasures_want.empty()) {
667 minimum->insert(want_to_read.begin(), want_to_read.end());
668 for (set<int>::const_iterator i = erasures_total.begin();
669 i != erasures_total.end();
670 ++i) {
671 if (minimum->count(*i))
672 minimum->erase(*i);
673 }
674 dout(20) << __func__ << " minimum = " << *minimum << dendl;
675 return 0;
676 }
677 }
678
679 {
680 //
681 // Case 3:
682 //
683 // The previous strategy failed to recover from all erasures.
684 //
685 // Try to recover as many chunks as possible, even from layers
686 // that do not contain chunks that we want, in the hope that it
687 // will help the upper layers.
688 //
689 set<int> erasures_total;
690 for (unsigned int i = 0; i < get_chunk_count(); ++i) {
691 if (available_chunks.count(i) == 0)
692 erasures_total.insert(i);
693 }
694
695 for (vector<Layer>::reverse_iterator i = layers.rbegin();
696 i != layers.rend();
697 ++i) {
698 set<int> layer_erasures;
699 set_intersection(i->chunks_as_set.begin(), i->chunks_as_set.end(),
700 erasures_total.begin(), erasures_total.end(),
701 inserter(layer_erasures, layer_erasures.end()));
702 //
703 // If this layer has no erasure, skip it
704 //
705 if (layer_erasures.empty())
706 continue;
707
708 if (layer_erasures.size() > 0 &&
709 layer_erasures.size() <= i->erasure_code->get_coding_chunk_count()) {
710 //
711 // chunks recovered by this layer are removed from the list of
712 // erasures so that upper levels know they can rely on their
713 // availability
714 //
715 for (set<int>::const_iterator j = layer_erasures.begin();
716 j != layer_erasures.end();
717 ++j) {
718 erasures_total.erase(*j);
719 }
720 }
721 }
722 if (erasures_total.empty()) {
723 //
724 // Do not try to be smart about what chunks are necessary to
725 // recover, use all available chunks.
726 //
727 *minimum = available_chunks;
728 dout(20) << __func__ << " minimum == available_chunks == "
729 << available_chunks << dendl;
730 return 0;
731 }
732 }
733
734 derr << __func__ << " not enough chunks in " << available_chunks
735 << " to read " << want_to_read << dendl;
736 return -EIO;
737}
738
739int ErasureCodeLrc::encode_chunks(const set<int> &want_to_encode,
740 map<int, bufferlist> *encoded)
741{
742 unsigned int top = layers.size();
743 for (vector<Layer>::reverse_iterator i = layers.rbegin();
744 i != layers.rend();
745 ++i) {
746 --top;
747 if (includes(i->chunks_as_set.begin(), i->chunks_as_set.end(),
748 want_to_encode.begin(), want_to_encode.end()))
749 break;
750 }
751
752 for (unsigned int i = top; i < layers.size(); ++i) {
753 const Layer &layer = layers[i];
754 set<int> layer_want_to_encode;
755 map<int, bufferlist> layer_encoded;
756 int j = 0;
11fdf7f2
TL
757 for (const auto& c : layer.chunks) {
758 std::swap(layer_encoded[j], (*encoded)[c]);
759 if (want_to_encode.find(c) != want_to_encode.end())
7c673cae
FG
760 layer_want_to_encode.insert(j);
761 j++;
762 }
763 int err = layer.erasure_code->encode_chunks(layer_want_to_encode,
764 &layer_encoded);
11fdf7f2
TL
765 j = 0;
766 for (const auto& c : layer.chunks) {
767 std::swap(layer_encoded[j++], (*encoded)[c]);
768 }
7c673cae
FG
769 if (err) {
770 derr << __func__ << " layer " << layer.chunks_map
771 << " failed with " << err << " trying to encode "
772 << layer_want_to_encode << dendl;
773 return err;
774 }
775 }
776 return 0;
777}
778
779int ErasureCodeLrc::decode_chunks(const set<int> &want_to_read,
780 const map<int, bufferlist> &chunks,
781 map<int, bufferlist> *decoded)
782{
783 set<int> available_chunks;
784 set<int> erasures;
785 for (unsigned int i = 0; i < get_chunk_count(); ++i) {
786 if (chunks.count(i) != 0)
787 available_chunks.insert(i);
788 else
789 erasures.insert(i);
790 }
791
792 set<int> want_to_read_erasures;
793
794 for (vector<Layer>::reverse_iterator layer = layers.rbegin();
795 layer != layers.rend();
796 ++layer) {
797 set<int> layer_erasures;
798 set_intersection(layer->chunks_as_set.begin(), layer->chunks_as_set.end(),
799 erasures.begin(), erasures.end(),
800 inserter(layer_erasures, layer_erasures.end()));
801
802 if (layer_erasures.size() >
803 layer->erasure_code->get_coding_chunk_count()) {
804 // skip because there are too many erasures for this layer to recover
805 } else if(layer_erasures.size() == 0) {
806 // skip because all chunks are already available
807 } else {
808 set<int> layer_want_to_read;
809 map<int, bufferlist> layer_chunks;
810 map<int, bufferlist> layer_decoded;
811 int j = 0;
812 for (vector<int>::const_iterator c = layer->chunks.begin();
813 c != layer->chunks.end();
814 ++c) {
815 //
816 // Pick chunks from *decoded* instead of *chunks* to re-use
817 // chunks recovered by previous layers. In other words
818 // *chunks* does not change but *decoded* gradually improves
819 // as more layers recover from erasures.
820 //
821 if (erasures.count(*c) == 0)
822 layer_chunks[j] = (*decoded)[*c];
823 if (want_to_read.count(*c) != 0)
824 layer_want_to_read.insert(j);
825 layer_decoded[j] = (*decoded)[*c];
826 ++j;
827 }
828 int err = layer->erasure_code->decode_chunks(layer_want_to_read,
829 layer_chunks,
830 &layer_decoded);
831 if (err) {
832 derr << __func__ << " layer " << layer->chunks_map
833 << " failed with " << err << " trying to decode "
834 << layer_want_to_read << " with " << available_chunks << dendl;
835 return err;
836 }
837 j = 0;
838 for (vector<int>::const_iterator c = layer->chunks.begin();
839 c != layer->chunks.end();
840 ++c) {
841 (*decoded)[*c] = layer_decoded[j];
842 ++j;
11fdf7f2 843 erasures.erase(*c);
7c673cae
FG
844 }
845 want_to_read_erasures.clear();
846 set_intersection(erasures.begin(), erasures.end(),
847 want_to_read.begin(), want_to_read.end(),
848 inserter(want_to_read_erasures, want_to_read_erasures.end()));
849 if (want_to_read_erasures.size() == 0)
850 break;
851 }
852 }
853
854 if (want_to_read_erasures.size() > 0) {
855 derr << __func__ << " want to read " << want_to_read
856 << " with available_chunks = " << available_chunks
857 << " end up being unable to read " << want_to_read_erasures << dendl;
858 return -EIO;
859 } else {
860 return 0;
861 }
862}