1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2018 Indian Institute of Science <office.ece@iisc.ac.in>
8 * Author: Myna Vajha <mynaramana@gmail.com>
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
20 #include "ErasureCodeClay.h"
22 #include "common/debug.h"
23 #include "erasure-code/ErasureCodePlugin.h"
24 #include "include/ceph_assert.h"
25 #include "include/str_map.h"
26 #include "include/stringify.h"
27 #include "osd/osd_types.h"
30 #define dout_context g_ceph_context
31 #define dout_subsys ceph_subsys_osd
33 #define dout_prefix _prefix(_dout)
35 #define LARGEST_VECTOR_WORDSIZE 16
36 #define talloc(type, num) (type *) malloc(sizeof(type)*(num))
40 static ostream
& _prefix(std::ostream
* _dout
)
42 return *_dout
<< "ErasureCodeClay: ";
45 static int pow_int(int a
, int x
) {
48 if (x
& 1) power
*= a
;
55 ErasureCodeClay::~ErasureCodeClay()
57 for (int i
= 0; i
< q
*t
; i
++) {
58 if (U_buf
[i
].length() != 0) U_buf
[i
].clear();
62 int ErasureCodeClay::init(ErasureCodeProfile
&profile
,
66 r
= parse(profile
, ss
);
70 r
= ErasureCode::init(profile
, ss
);
73 ErasureCodePluginRegistry
®istry
= ErasureCodePluginRegistry::instance();
74 r
= registry
.factory(mds
.profile
["plugin"],
81 r
= registry
.factory(pft
.profile
["plugin"],
90 unsigned int ErasureCodeClay::get_chunk_size(unsigned int object_size
) const
92 unsigned int alignment_scalar_code
= pft
.erasure_code
->get_chunk_size(1);
93 unsigned int alignment
= sub_chunk_no
* k
* alignment_scalar_code
;
95 return round_up_to(object_size
, alignment
) / k
;
98 int ErasureCodeClay::minimum_to_decode(const set
<int> &want_to_read
,
99 const set
<int> &available
,
100 map
<int, vector
<pair
<int, int>>> *minimum
)
102 if (is_repair(want_to_read
, available
)) {
103 return minimum_to_repair(want_to_read
, available
, minimum
);
105 return ErasureCode::minimum_to_decode(want_to_read
, available
, minimum
);
109 int ErasureCodeClay::decode(const set
<int> &want_to_read
,
110 const map
<int, bufferlist
> &chunks
,
111 map
<int, bufferlist
> *decoded
, int chunk_size
)
114 for ([[maybe_unused
]] auto& [node
, bl
] : chunks
) {
116 (void)bl
; // silence -Wunused-variable
119 if (is_repair(want_to_read
, avail
) &&
120 ((unsigned int)chunk_size
> chunks
.begin()->second
.length())) {
121 return repair(want_to_read
, chunks
, decoded
, chunk_size
);
123 return ErasureCode::_decode(want_to_read
, chunks
, decoded
);
127 void p(const set
<int> &s
) { cerr
<< s
; } // for gdb
129 int ErasureCodeClay::encode_chunks(const set
<int> &want_to_encode
,
130 map
<int, bufferlist
> *encoded
)
132 map
<int, bufferlist
> chunks
;
133 set
<int> parity_chunks
;
134 int chunk_size
= (*encoded
)[0].length();
136 for (int i
= 0; i
< k
+ m
; i
++) {
138 chunks
[i
] = (*encoded
)[i
];
140 chunks
[i
+nu
] = (*encoded
)[i
];
141 parity_chunks
.insert(i
+nu
);
145 for (int i
= k
; i
< k
+ nu
; i
++) {
146 bufferptr
buf(buffer::create_aligned(chunk_size
, SIMD_ALIGN
));
148 chunks
[i
].push_back(std::move(buf
));
151 int res
= decode_layered(parity_chunks
, &chunks
);
152 for (int i
= k
; i
< k
+ nu
; i
++) {
153 // need to clean some of the intermediate chunks here!!
159 int ErasureCodeClay::decode_chunks(const set
<int> &want_to_read
,
160 const map
<int, bufferlist
> &chunks
,
161 map
<int, bufferlist
> *decoded
)
164 map
<int, bufferlist
> coded_chunks
;
166 for (int i
= 0; i
< k
+ m
; i
++) {
167 if (chunks
.count(i
) == 0) {
168 erasures
.insert(i
< k
? i
: i
+nu
);
170 ceph_assert(decoded
->count(i
) > 0);
171 coded_chunks
[i
< k
? i
: i
+nu
] = (*decoded
)[i
];
173 int chunk_size
= coded_chunks
[0].length();
175 for (int i
= k
; i
< k
+nu
; i
++) {
176 bufferptr
buf(buffer::create_aligned(chunk_size
, SIMD_ALIGN
));
178 coded_chunks
[i
].push_back(std::move(buf
));
181 int res
= decode_layered(erasures
, &coded_chunks
);
182 for (int i
= k
; i
< k
+nu
; i
++) {
183 coded_chunks
[i
].clear();
188 int ErasureCodeClay::parse(ErasureCodeProfile
&profile
,
192 err
= ErasureCode::parse(profile
, ss
);
193 err
|= to_int("k", profile
, &k
, DEFAULT_K
, ss
);
194 err
|= to_int("m", profile
, &m
, DEFAULT_M
, ss
);
196 err
|= sanity_check_k_m(k
, m
, ss
);
198 err
|= to_int("d", profile
, &d
, std::to_string(k
+m
-1), ss
);
200 // check for scalar_mds in profile input
201 if (profile
.find("scalar_mds") == profile
.end() ||
202 profile
.find("scalar_mds")->second
.empty()) {
203 mds
.profile
["plugin"] = "jerasure";
204 pft
.profile
["plugin"] = "jerasure";
206 std::string p
= profile
.find("scalar_mds")->second
;
207 if ((p
== "jerasure") || (p
== "isa") || (p
== "shec")) {
208 mds
.profile
["plugin"] = p
;
209 pft
.profile
["plugin"] = p
;
211 *ss
<< "scalar_mds " << mds
.profile
["plugin"] <<
212 "is not currently supported, use one of 'jerasure',"<<
213 " 'isa', 'shec'" << std::endl
;
219 if (profile
.find("technique") == profile
.end() ||
220 profile
.find("technique")->second
.empty()) {
221 if ((mds
.profile
["plugin"]=="jerasure") || (mds
.profile
["plugin"]=="isa") ) {
222 mds
.profile
["technique"] = "reed_sol_van";
223 pft
.profile
["technique"] = "reed_sol_van";
225 mds
.profile
["technique"] = "single";
226 pft
.profile
["technique"] = "single";
229 std::string p
= profile
.find("technique")->second
;
230 if (mds
.profile
["plugin"] == "jerasure") {
231 if ( (p
== "reed_sol_van") || (p
== "reed_sol_r6_op") || (p
== "cauchy_orig")
232 || (p
== "cauchy_good") || (p
== "liber8tion")) {
233 mds
.profile
["technique"] = p
;
234 pft
.profile
["technique"] = p
;
236 *ss
<< "technique " << p
<< "is not currently supported, use one of "
237 << "reed_sol_van', 'reed_sol_r6_op','cauchy_orig',"
238 << "'cauchy_good','liber8tion'"<< std::endl
;
242 } else if (mds
.profile
["plugin"] == "isa") {
243 if ( (p
== "reed_sol_van") || (p
== "cauchy")) {
244 mds
.profile
["technique"] = p
;
245 pft
.profile
["technique"] = p
;
247 *ss
<< "technique " << p
<< "is not currently supported, use one of"
248 << "'reed_sol_van','cauchy'"<< std::endl
;
253 if ( (p
== "single") || (p
== "multiple")) {
254 mds
.profile
["technique"] = p
;
255 pft
.profile
["technique"] = p
;
257 *ss
<< "technique " << p
<< "is not currently supported, use one of"<<
258 "'single','multiple'"<< std::endl
;
264 if ((d
< k
) || (d
> k
+ m
- 1)) {
265 *ss
<< "value of d " << d
266 << " must be within [ " << k
<< "," << k
+m
-1 << "]" << std::endl
;
273 nu
= q
- (k
+ m
) % q
;
283 if (mds
.profile
["plugin"] == "shec") {
284 mds
.profile
["c"] = '2';
285 pft
.profile
["c"] = '2';
287 mds
.profile
["k"] = std::to_string(k
+nu
);
288 mds
.profile
["m"] = std::to_string(m
);
289 mds
.profile
["w"] = '8';
291 pft
.profile
["k"] = '2';
292 pft
.profile
["m"] = '2';
293 pft
.profile
["w"] = '8';
295 t
= (k
+ m
+ nu
) / q
;
296 sub_chunk_no
= pow_int(q
, t
);
299 << " (q,t,nu)=(" << q
<< "," << t
<< "," << nu
<<")" << dendl
;
304 int ErasureCodeClay::is_repair(const set
<int> &want_to_read
,
305 const set
<int> &available_chunks
) {
307 if (includes(available_chunks
.begin(), available_chunks
.end(),
308 want_to_read
.begin(), want_to_read
.end())) return 0;
309 if (want_to_read
.size() > 1) return 0;
311 int i
= *want_to_read
.begin();
312 int lost_node_id
= (i
< k
) ? i
: i
+nu
;
313 for (int x
= 0; x
< q
; x
++) {
314 int node
= (lost_node_id
/q
)*q
+x
;
315 node
= (node
< k
) ? node
: node
-nu
;
316 if (node
!= i
) { // node in the same group other than erased node
317 if (available_chunks
.count(node
) == 0) return 0;
321 if (available_chunks
.size() < (unsigned)d
) return 0;
325 int ErasureCodeClay::minimum_to_repair(const set
<int> &want_to_read
,
326 const set
<int> &available_chunks
,
327 map
<int, vector
<pair
<int, int>>> *minimum
)
329 int i
= *want_to_read
.begin();
330 int lost_node_index
= (i
< k
) ? i
: i
+nu
;
331 int rep_node_index
= 0;
333 // add all the nodes in lost node's y column.
334 vector
<pair
<int, int>> sub_chunk_ind
;
335 get_repair_subchunks(lost_node_index
, sub_chunk_ind
);
336 if ((available_chunks
.size() >= (unsigned)d
)) {
337 for (int j
= 0; j
< q
; j
++) {
338 if (j
!= lost_node_index
%q
) {
339 rep_node_index
= (lost_node_index
/q
)*q
+j
;
340 if (rep_node_index
< k
) {
341 minimum
->insert(make_pair(rep_node_index
, sub_chunk_ind
));
342 } else if (rep_node_index
>= k
+nu
) {
343 minimum
->insert(make_pair(rep_node_index
-nu
, sub_chunk_ind
));
347 for (auto chunk
: available_chunks
) {
348 if (minimum
->size() >= (unsigned)d
) {
351 if (!minimum
->count(chunk
)) {
352 minimum
->emplace(chunk
, sub_chunk_ind
);
356 dout(0) << "minimum_to_repair: shouldn't have come here" << dendl
;
359 ceph_assert(minimum
->size() == (unsigned)d
);
363 void ErasureCodeClay::get_repair_subchunks(const int &lost_node
,
364 vector
<pair
<int, int>> &repair_sub_chunks_ind
)
366 const int y_lost
= lost_node
/ q
;
367 const int x_lost
= lost_node
% q
;
369 const int seq_sc_count
= pow_int(q
, t
-1-y_lost
);
370 const int num_seq
= pow_int(q
, y_lost
);
372 int index
= x_lost
* seq_sc_count
;
373 for (int ind_seq
= 0; ind_seq
< num_seq
; ind_seq
++) {
374 repair_sub_chunks_ind
.push_back(make_pair(index
, seq_sc_count
));
375 index
+= q
* seq_sc_count
;
379 int ErasureCodeClay::get_repair_sub_chunk_count(const set
<int> &want_to_read
)
381 int weight_vector
[t
];
382 std::fill(weight_vector
, weight_vector
+ t
, 0);
383 for (auto to_read
: want_to_read
) {
384 weight_vector
[to_read
/ q
]++;
387 int repair_subchunks_count
= 1;
388 for (int y
= 0; y
< t
; y
++) {
389 repair_subchunks_count
= repair_subchunks_count
*(q
-weight_vector
[y
]);
392 return sub_chunk_no
- repair_subchunks_count
;
395 int ErasureCodeClay::repair(const set
<int> &want_to_read
,
396 const map
<int, bufferlist
> &chunks
,
397 map
<int, bufferlist
> *repaired
, int chunk_size
)
400 ceph_assert((want_to_read
.size() == 1) && (chunks
.size() == (unsigned)d
));
402 int repair_sub_chunk_no
= get_repair_sub_chunk_count(want_to_read
);
403 vector
<pair
<int, int>> repair_sub_chunks_ind
;
405 unsigned repair_blocksize
= chunks
.begin()->second
.length();
406 assert(repair_blocksize
%repair_sub_chunk_no
== 0);
408 unsigned sub_chunksize
= repair_blocksize
/repair_sub_chunk_no
;
409 unsigned chunksize
= sub_chunk_no
*sub_chunksize
;
411 ceph_assert(chunksize
== (unsigned)chunk_size
);
413 map
<int, bufferlist
> recovered_data
;
414 map
<int, bufferlist
> helper_data
;
415 set
<int> aloof_nodes
;
417 for (int i
= 0; i
< k
+ m
; i
++) {
418 // included helper data only for d+nu nodes.
419 if (auto found
= chunks
.find(i
); found
!= chunks
.end()) { // i is a helper
421 helper_data
[i
] = found
->second
;
423 helper_data
[i
+nu
] = found
->second
;
426 if (i
!= *want_to_read
.begin()) { // aloof node case.
427 int aloof_node_id
= (i
< k
) ? i
: i
+nu
;
428 aloof_nodes
.insert(aloof_node_id
);
430 bufferptr
ptr(buffer::create_aligned(chunksize
, SIMD_ALIGN
));
432 int lost_node_id
= (i
< k
) ? i
: i
+nu
;
433 (*repaired
)[i
].push_back(ptr
);
434 recovered_data
[lost_node_id
] = (*repaired
)[i
];
435 get_repair_subchunks(lost_node_id
, repair_sub_chunks_ind
);
440 // this is for shortened codes i.e., when nu > 0
441 for (int i
=k
; i
< k
+nu
; i
++) {
442 bufferptr
ptr(buffer::create_aligned(repair_blocksize
, SIMD_ALIGN
));
444 helper_data
[i
].push_back(ptr
);
447 ceph_assert(helper_data
.size()+aloof_nodes
.size()+recovered_data
.size() ==
450 int r
= repair_one_lost_chunk(recovered_data
, aloof_nodes
,
451 helper_data
, repair_blocksize
,
452 repair_sub_chunks_ind
);
454 // clear buffers created for the purpose of shortening
455 for (int i
= k
; i
< k
+nu
; i
++) {
456 helper_data
[i
].clear();
462 int ErasureCodeClay::repair_one_lost_chunk(map
<int, bufferlist
> &recovered_data
,
463 set
<int> &aloof_nodes
,
464 map
<int, bufferlist
> &helper_data
,
465 int repair_blocksize
,
466 vector
<pair
<int,int>> &repair_sub_chunks_ind
)
468 unsigned repair_subchunks
= (unsigned)sub_chunk_no
/ q
;
469 unsigned sub_chunksize
= repair_blocksize
/ repair_subchunks
;
472 map
<int, set
<int> > ordered_planes
;
473 map
<int, int> repair_plane_to_ind
;
474 int count_retrieved_sub_chunks
= 0;
477 bufferptr
buf(buffer::create_aligned(sub_chunksize
, SIMD_ALIGN
));
479 temp_buf
.push_back(buf
);
481 for (auto [index
,count
] : repair_sub_chunks_ind
) {
482 for (int j
= index
; j
< index
+ count
; j
++) {
483 get_plane_vector(j
, z_vec
);
485 // check across all erasures and aloof nodes
486 for ([[maybe_unused
]] auto& [node
, bl
] : recovered_data
) {
487 if (node
% q
== z_vec
[node
/ q
]) order
++;
488 (void)bl
; // silence -Wunused-variable
490 for (auto node
: aloof_nodes
) {
491 if (node
% q
== z_vec
[node
/ q
]) order
++;
493 ceph_assert(order
> 0);
494 ordered_planes
[order
].insert(j
);
495 // to keep track of a sub chunk within helper buffer recieved
496 repair_plane_to_ind
[j
] = plane_ind
;
500 assert((unsigned)plane_ind
== repair_subchunks
);
503 for (int i
= 0; i
< q
*t
; i
++) {
504 if (U_buf
[i
].length() == 0) {
505 bufferptr
buf(buffer::create_aligned(sub_chunk_no
*sub_chunksize
, SIMD_ALIGN
));
507 U_buf
[i
].push_back(std::move(buf
));
513 for ([[maybe_unused
]] auto& [node
, bl
] : recovered_data
) {
516 (void)bl
; // silence -Wunused-variable
518 ceph_assert(count
== 1);
521 for (int i
= 0; i
< q
; i
++) {
522 erasures
.insert(lost_chunk
- lost_chunk
% q
+ i
);
524 for (auto node
: aloof_nodes
) {
525 erasures
.insert(node
);
528 for (int order
= 1; ;order
++) {
529 if (ordered_planes
.count(order
) == 0) {
532 plane_count
+= ordered_planes
[order
].size();
533 for (auto z
: ordered_planes
[order
]) {
534 get_plane_vector(z
, z_vec
);
536 for (int y
= 0; y
< t
; y
++) {
537 for (int x
= 0; x
< q
; x
++) {
538 int node_xy
= y
*q
+ x
;
539 map
<int, bufferlist
> known_subchunks
;
540 map
<int, bufferlist
> pftsubchunks
;
541 set
<int> pft_erasures
;
542 if (erasures
.count(node_xy
) == 0) {
543 assert(helper_data
.count(node_xy
) > 0);
544 int z_sw
= z
+ (x
- z_vec
[y
])*pow_int(q
,t
-1-y
);
545 int node_sw
= y
*q
+ z_vec
[y
];
546 int i0
= 0, i1
= 1, i2
= 2, i3
= 3;
553 if (aloof_nodes
.count(node_sw
) > 0) {
554 assert(repair_plane_to_ind
.count(z
) > 0);
555 assert(repair_plane_to_ind
.count(z_sw
) > 0);
556 pft_erasures
.insert(i2
);
557 known_subchunks
[i0
].substr_of(helper_data
[node_xy
], repair_plane_to_ind
[z
]*sub_chunksize
, sub_chunksize
);
558 known_subchunks
[i3
].substr_of(U_buf
[node_sw
], z_sw
*sub_chunksize
, sub_chunksize
);
559 pftsubchunks
[i0
] = known_subchunks
[i0
];
560 pftsubchunks
[i1
] = temp_buf
;
561 pftsubchunks
[i2
].substr_of(U_buf
[node_xy
], z
*sub_chunksize
, sub_chunksize
);
562 pftsubchunks
[i3
] = known_subchunks
[i3
];
563 for (int i
=0; i
<3; i
++) {
564 pftsubchunks
[i
].rebuild_aligned(SIMD_ALIGN
);
566 pft
.erasure_code
->decode_chunks(pft_erasures
, known_subchunks
, &pftsubchunks
);
568 ceph_assert(helper_data
.count(node_sw
) > 0);
569 ceph_assert(repair_plane_to_ind
.count(z
) > 0);
571 pft_erasures
.insert(i2
);
572 ceph_assert(repair_plane_to_ind
.count(z_sw
) > 0);
573 known_subchunks
[i0
].substr_of(helper_data
[node_xy
], repair_plane_to_ind
[z
]*sub_chunksize
, sub_chunksize
);
574 known_subchunks
[i1
].substr_of(helper_data
[node_sw
], repair_plane_to_ind
[z_sw
]*sub_chunksize
, sub_chunksize
);
575 pftsubchunks
[i0
] = known_subchunks
[i0
];
576 pftsubchunks
[i1
] = known_subchunks
[i1
];
577 pftsubchunks
[i2
].substr_of(U_buf
[node_xy
], z
*sub_chunksize
, sub_chunksize
);
578 pftsubchunks
[i3
].substr_of(temp_buf
, 0, sub_chunksize
);
579 for (int i
=0; i
<3; i
++) {
580 pftsubchunks
[i
].rebuild_aligned(SIMD_ALIGN
);
582 pft
.erasure_code
->decode_chunks(pft_erasures
, known_subchunks
, &pftsubchunks
);
584 char* uncoupled_chunk
= U_buf
[node_xy
].c_str();
585 char* coupled_chunk
= helper_data
[node_xy
].c_str();
586 memcpy(&uncoupled_chunk
[z
*sub_chunksize
],
587 &coupled_chunk
[repair_plane_to_ind
[z
]*sub_chunksize
],
594 ceph_assert(erasures
.size() <= (unsigned)m
);
595 decode_uncoupled(erasures
, z
, sub_chunksize
);
597 for (auto i
: erasures
) {
600 int node_sw
= y
*q
+z_vec
[y
];
601 int z_sw
= z
+ (x
- z_vec
[y
]) * pow_int(q
,t
-1-y
);
602 set
<int> pft_erasures
;
603 map
<int, bufferlist
> known_subchunks
;
604 map
<int, bufferlist
> pftsubchunks
;
605 int i0
= 0, i1
= 1, i2
= 2, i3
= 3;
612 // make sure it is not an aloof node before you retrieve repaired_data
613 if (aloof_nodes
.count(i
) == 0) {
614 if (x
== z_vec
[y
]) { // hole-dot pair (type 0)
615 char* coupled_chunk
= recovered_data
[i
].c_str();
616 char* uncoupled_chunk
= U_buf
[i
].c_str();
617 memcpy(&coupled_chunk
[z
*sub_chunksize
],
618 &uncoupled_chunk
[z
*sub_chunksize
],
620 count_retrieved_sub_chunks
++;
622 ceph_assert(y
== lost_chunk
/ q
);
623 ceph_assert(node_sw
== lost_chunk
);
624 ceph_assert(helper_data
.count(i
) > 0);
625 pft_erasures
.insert(i1
);
626 known_subchunks
[i0
].substr_of(helper_data
[i
], repair_plane_to_ind
[z
]*sub_chunksize
, sub_chunksize
);
627 known_subchunks
[i2
].substr_of(U_buf
[i
], z
*sub_chunksize
, sub_chunksize
);
629 pftsubchunks
[i0
] = known_subchunks
[i0
];
630 pftsubchunks
[i1
].substr_of(recovered_data
[node_sw
], z_sw
*sub_chunksize
, sub_chunksize
);
631 pftsubchunks
[i2
] = known_subchunks
[i2
];
632 pftsubchunks
[i3
] = temp_buf
;
633 for (int i
=0; i
<3; i
++) {
634 pftsubchunks
[i
].rebuild_aligned(SIMD_ALIGN
);
636 pft
.erasure_code
->decode_chunks(pft_erasures
, known_subchunks
, &pftsubchunks
);
639 } // recover all erasures
640 } // planes of particular order
647 int ErasureCodeClay::decode_layered(set
<int> &erased_chunks
,
648 map
<int, bufferlist
> *chunks
)
650 int num_erasures
= erased_chunks
.size();
652 int size
= (*chunks
)[0].length();
653 ceph_assert(size
%sub_chunk_no
== 0);
654 int sc_size
= size
/ sub_chunk_no
;
656 ceph_assert(num_erasures
> 0);
658 for (int i
= k
+nu
; (num_erasures
< m
) && (i
< q
*t
); i
++) {
659 if ([[maybe_unused
]] auto [it
, added
] = erased_chunks
.emplace(i
); added
) {
661 (void)it
; // silence -Wunused-variable
664 ceph_assert(num_erasures
== m
);
666 int max_iscore
= get_max_iscore(erased_chunks
);
667 int order
[sub_chunk_no
];
669 for (int i
= 0; i
< q
*t
; i
++) {
670 if (U_buf
[i
].length() == 0) {
671 bufferptr
buf(buffer::create_aligned(size
, SIMD_ALIGN
));
673 U_buf
[i
].push_back(std::move(buf
));
677 set_planes_sequential_decoding_order(order
, erased_chunks
);
679 for (int iscore
= 0; iscore
<= max_iscore
; iscore
++) {
680 for (int z
= 0; z
< sub_chunk_no
; z
++) {
681 if (order
[z
] == iscore
) {
682 decode_erasures(erased_chunks
, z
, chunks
, sc_size
);
686 for (int z
= 0; z
< sub_chunk_no
; z
++) {
687 if (order
[z
] == iscore
) {
688 get_plane_vector(z
, z_vec
);
689 for (auto node_xy
: erased_chunks
) {
692 int node_sw
= y
*q
+z_vec
[y
];
694 if (erased_chunks
.count(node_sw
) == 0) {
695 recover_type1_erasure(chunks
, x
, y
, z
, z_vec
, sc_size
);
696 } else if (z_vec
[y
] < x
){
697 ceph_assert(erased_chunks
.count(node_sw
) > 0);
698 ceph_assert(z_vec
[y
] != x
);
699 get_coupled_from_uncoupled(chunks
, x
, y
, z
, z_vec
, sc_size
);
702 char* C
= (*chunks
)[node_xy
].c_str();
703 char* U
= U_buf
[node_xy
].c_str();
704 memcpy(&C
[z
*sc_size
], &U
[z
*sc_size
], sc_size
);
714 int ErasureCodeClay::decode_erasures(const set
<int>& erased_chunks
, int z
,
715 map
<int, bufferlist
>* chunks
, int sc_size
)
719 get_plane_vector(z
, z_vec
);
721 for (int x
= 0; x
< q
; x
++) {
722 for (int y
= 0; y
< t
; y
++) {
724 int node_sw
= q
*y
+z_vec
[y
];
725 if (erased_chunks
.count(node_xy
) == 0) {
727 get_uncoupled_from_coupled(chunks
, x
, y
, z
, z_vec
, sc_size
);
728 } else if (z_vec
[y
] == x
) {
729 char* uncoupled_chunk
= U_buf
[node_xy
].c_str();
730 char* coupled_chunk
= (*chunks
)[node_xy
].c_str();
731 memcpy(&uncoupled_chunk
[z
*sc_size
], &coupled_chunk
[z
*sc_size
], sc_size
);
733 if (erased_chunks
.count(node_sw
) > 0) {
734 get_uncoupled_from_coupled(chunks
, x
, y
, z
, z_vec
, sc_size
);
740 return decode_uncoupled(erased_chunks
, z
, sc_size
);
743 int ErasureCodeClay::decode_uncoupled(const set
<int>& erased_chunks
, int z
, int sc_size
)
745 map
<int, bufferlist
> known_subchunks
;
746 map
<int, bufferlist
> all_subchunks
;
748 for (int i
= 0; i
< q
*t
; i
++) {
749 if (erased_chunks
.count(i
) == 0) {
750 known_subchunks
[i
].substr_of(U_buf
[i
], z
*sc_size
, sc_size
);
751 all_subchunks
[i
] = known_subchunks
[i
];
753 all_subchunks
[i
].substr_of(U_buf
[i
], z
*sc_size
, sc_size
);
755 all_subchunks
[i
].rebuild_aligned_size_and_memory(sc_size
, SIMD_ALIGN
);
756 assert(all_subchunks
[i
].is_contiguous());
759 mds
.erasure_code
->decode_chunks(erased_chunks
, known_subchunks
, &all_subchunks
);
763 void ErasureCodeClay::set_planes_sequential_decoding_order(int* order
, set
<int>& erasures
) {
765 for (int z
= 0; z
< sub_chunk_no
; z
++) {
766 get_plane_vector(z
,z_vec
);
768 for (auto i
: erasures
) {
769 if (i
% q
== z_vec
[i
/ q
]) {
770 order
[z
] = order
[z
] + 1;
776 void ErasureCodeClay::recover_type1_erasure(map
<int, bufferlist
>* chunks
,
778 int* z_vec
, int sc_size
)
780 set
<int> erased_chunks
;
783 int node_sw
= y
*q
+z_vec
[y
];
784 int z_sw
= z
+ (x
- z_vec
[y
]) * pow_int(q
,t
-1-y
);
786 map
<int, bufferlist
> known_subchunks
;
787 map
<int, bufferlist
> pftsubchunks
;
788 bufferptr
ptr(buffer::create_aligned(sc_size
, SIMD_ALIGN
));
791 int i0
= 0, i1
= 1, i2
= 2, i3
= 3;
799 erased_chunks
.insert(i0
);
800 pftsubchunks
[i0
].substr_of((*chunks
)[node_xy
], z
* sc_size
, sc_size
);
801 known_subchunks
[i1
].substr_of((*chunks
)[node_sw
], z_sw
* sc_size
, sc_size
);
802 known_subchunks
[i2
].substr_of(U_buf
[node_xy
], z
* sc_size
, sc_size
);
803 pftsubchunks
[i1
] = known_subchunks
[i1
];
804 pftsubchunks
[i2
] = known_subchunks
[i2
];
805 pftsubchunks
[i3
].push_back(ptr
);
807 for (int i
=0; i
<3; i
++) {
808 pftsubchunks
[i
].rebuild_aligned_size_and_memory(sc_size
, SIMD_ALIGN
);
811 pft
.erasure_code
->decode_chunks(erased_chunks
, known_subchunks
, &pftsubchunks
);
814 void ErasureCodeClay::get_coupled_from_uncoupled(map
<int, bufferlist
>* chunks
,
816 int* z_vec
, int sc_size
)
818 set
<int> erased_chunks
= {0, 1};
821 int node_sw
= y
*q
+z_vec
[y
];
822 int z_sw
= z
+ (x
- z_vec
[y
]) * pow_int(q
,t
-1-y
);
824 ceph_assert(z_vec
[y
] < x
);
825 map
<int, bufferlist
> uncoupled_subchunks
;
826 uncoupled_subchunks
[2].substr_of(U_buf
[node_xy
], z
* sc_size
, sc_size
);
827 uncoupled_subchunks
[3].substr_of(U_buf
[node_sw
], z_sw
* sc_size
, sc_size
);
829 map
<int, bufferlist
> pftsubchunks
;
830 pftsubchunks
[0].substr_of((*chunks
)[node_xy
], z
* sc_size
, sc_size
);
831 pftsubchunks
[1].substr_of((*chunks
)[node_sw
], z_sw
* sc_size
, sc_size
);
832 pftsubchunks
[2] = uncoupled_subchunks
[2];
833 pftsubchunks
[3] = uncoupled_subchunks
[3];
835 for (int i
=0; i
<3; i
++) {
836 pftsubchunks
[i
].rebuild_aligned_size_and_memory(sc_size
, SIMD_ALIGN
);
838 pft
.erasure_code
->decode_chunks(erased_chunks
, uncoupled_subchunks
, &pftsubchunks
);
841 void ErasureCodeClay::get_uncoupled_from_coupled(map
<int, bufferlist
>* chunks
,
843 int* z_vec
, int sc_size
)
845 set
<int> erased_chunks
= {2, 3};
848 int node_sw
= y
*q
+z_vec
[y
];
849 int z_sw
= z
+ (x
- z_vec
[y
]) * pow_int(q
,t
-1-y
);
851 int i0
= 0, i1
= 1, i2
= 2, i3
= 3;
858 map
<int, bufferlist
> coupled_subchunks
;
859 coupled_subchunks
[i0
].substr_of((*chunks
)[node_xy
], z
* sc_size
, sc_size
);
860 coupled_subchunks
[i1
].substr_of((*chunks
)[node_sw
], z_sw
* sc_size
, sc_size
);
862 map
<int, bufferlist
> pftsubchunks
;
863 pftsubchunks
[0] = coupled_subchunks
[0];
864 pftsubchunks
[1] = coupled_subchunks
[1];
865 pftsubchunks
[i2
].substr_of(U_buf
[node_xy
], z
* sc_size
, sc_size
);
866 pftsubchunks
[i3
].substr_of(U_buf
[node_sw
], z_sw
* sc_size
, sc_size
);
867 for (int i
=0; i
<3; i
++) {
868 pftsubchunks
[i
].rebuild_aligned_size_and_memory(sc_size
, SIMD_ALIGN
);
870 pft
.erasure_code
->decode_chunks(erased_chunks
, coupled_subchunks
, &pftsubchunks
);
873 int ErasureCodeClay::get_max_iscore(set
<int>& erased_chunks
)
877 memset(weight_vec
, 0, sizeof(int)*t
);
879 for (auto i
: erased_chunks
) {
880 if (weight_vec
[i
/ q
] == 0) {
881 weight_vec
[i
/ q
] = 1;
888 void ErasureCodeClay::get_plane_vector(int z
, int* z_vec
)
890 for (int i
= 0; i
< t
; i
++) {
891 z_vec
[t
-1-i
] = z
% q
;
892 z
= (z
- z_vec
[t
-1-i
]) / q
;