]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph distributed storage system | |
5 | * | |
6 | * Copyright (C) 2013 Cloudwatt <libre.licensing@cloudwatt.com> | |
7 | * | |
8 | * Author: Loic Dachary <loic@dachary.org> | |
9 | * | |
10 | * This library is free software; you can redistribute it and/or | |
11 | * modify it under the terms of the GNU Lesser General Public | |
12 | * License as published by the Free Software Foundation; either | |
13 | * version 2.1 of the License, or (at your option) any later version. | |
14 | * | |
15 | */ | |
16 | ||
17 | #ifndef CEPH_ERASURE_CODE_EXAMPLE_H | |
18 | #define CEPH_ERASURE_CODE_EXAMPLE_H | |
19 | ||
20 | #include <unistd.h> | |
21 | #include <errno.h> | |
22 | #include <algorithm> | |
23 | #include <sstream> | |
24 | ||
25 | #include "crush/CrushWrapper.h" | |
26 | #include "osd/osd_types.h" | |
27 | #include "erasure-code/ErasureCode.h" | |
28 | ||
29 | #define FIRST_DATA_CHUNK 0 | |
30 | #define SECOND_DATA_CHUNK 1 | |
31 | #define DATA_CHUNKS 2u | |
32 | ||
33 | #define CODING_CHUNK 2 | |
34 | #define CODING_CHUNKS 1u | |
35 | ||
36 | #define MINIMUM_TO_RECOVER 2u | |
37 | ||
38 | class ErasureCodeExample : public ErasureCode { | |
39 | public: | |
40 | ~ErasureCodeExample() override {} | |
41 | ||
224ce89b | 42 | int create_rule(const string &name, |
7c673cae FG |
43 | CrushWrapper &crush, |
44 | ostream *ss) const override { | |
224ce89b | 45 | return crush.add_simple_rule(name, "default", "host", "", |
31f18b77 | 46 | "indep", pg_pool_t::TYPE_ERASURE, ss); |
7c673cae FG |
47 | } |
48 | ||
49 | int minimum_to_decode(const set<int> &want_to_read, | |
50 | const set<int> &available_chunks, | |
51 | set<int> *minimum) override { | |
52 | if (includes(available_chunks.begin(), available_chunks.end(), | |
53 | want_to_read.begin(), want_to_read.end())) { | |
54 | *minimum = want_to_read; | |
55 | return 0; | |
56 | } else if (available_chunks.size() >= MINIMUM_TO_RECOVER) { | |
57 | *minimum = available_chunks; | |
58 | return 0; | |
59 | } else { | |
60 | return -EIO; | |
61 | } | |
62 | } | |
63 | ||
64 | int minimum_to_decode_with_cost(const set<int> &want_to_read, | |
65 | const map<int, int> &available, | |
66 | set<int> *minimum) override { | |
67 | // | |
68 | // If one chunk is more expensive to fetch than the others, | |
69 | // recover it instead. For instance, if the cost reflects the | |
70 | // time it takes for a chunk to be retrieved from a remote | |
71 | // OSD and if CPU is cheap, it could make sense to recover | |
72 | // instead of fetching the chunk. | |
73 | // | |
74 | map<int, int> c2c(available); | |
75 | if (c2c.size() > DATA_CHUNKS) { | |
76 | if (c2c[FIRST_DATA_CHUNK] > c2c[SECOND_DATA_CHUNK] && | |
77 | c2c[FIRST_DATA_CHUNK] > c2c[CODING_CHUNK]) | |
78 | c2c.erase(FIRST_DATA_CHUNK); | |
79 | else if(c2c[SECOND_DATA_CHUNK] > c2c[FIRST_DATA_CHUNK] && | |
80 | c2c[SECOND_DATA_CHUNK] > c2c[CODING_CHUNK]) | |
81 | c2c.erase(SECOND_DATA_CHUNK); | |
82 | else if(c2c[CODING_CHUNK] > c2c[FIRST_DATA_CHUNK] && | |
83 | c2c[CODING_CHUNK] > c2c[SECOND_DATA_CHUNK]) | |
84 | c2c.erase(CODING_CHUNK); | |
85 | } | |
86 | set <int> available_chunks; | |
87 | for (map<int, int>::const_iterator i = c2c.begin(); | |
88 | i != c2c.end(); | |
89 | ++i) | |
90 | available_chunks.insert(i->first); | |
91 | return minimum_to_decode(want_to_read, available_chunks, minimum); | |
92 | } | |
93 | ||
94 | unsigned int get_chunk_count() const override { | |
95 | return DATA_CHUNKS + CODING_CHUNKS; | |
96 | } | |
97 | ||
98 | unsigned int get_data_chunk_count() const override { | |
99 | return DATA_CHUNKS; | |
100 | } | |
101 | ||
102 | unsigned int get_chunk_size(unsigned int object_size) const override { | |
103 | return ( object_size / DATA_CHUNKS ) + 1; | |
104 | } | |
105 | ||
106 | int encode(const set<int> &want_to_encode, | |
107 | const bufferlist &in, | |
108 | map<int, bufferlist> *encoded) override { | |
109 | // | |
110 | // make sure all data chunks have the same length, allocating | |
111 | // padding if necessary. | |
112 | // | |
113 | unsigned int chunk_length = get_chunk_size(in.length()); | |
114 | bufferlist out(in); | |
115 | unsigned int width = get_chunk_count() * get_chunk_size(in.length()); | |
116 | bufferptr pad(width - in.length()); | |
117 | pad.zero(0, get_data_chunk_count()); | |
118 | out.push_back(pad); | |
119 | // | |
120 | // compute the coding chunk with first chunk ^ second chunk | |
121 | // | |
122 | char *p = out.c_str(); | |
123 | for (unsigned i = 0; i < chunk_length; i++) | |
124 | p[i + CODING_CHUNK * chunk_length] = | |
125 | p[i + FIRST_DATA_CHUNK * chunk_length] ^ | |
126 | p[i + SECOND_DATA_CHUNK * chunk_length]; | |
127 | // | |
128 | // populate the bufferlist with bufferptr pointing | |
129 | // to chunk boundaries | |
130 | // | |
131 | const bufferptr &ptr = out.front(); | |
132 | for (set<int>::iterator j = want_to_encode.begin(); | |
133 | j != want_to_encode.end(); | |
134 | ++j) { | |
135 | bufferptr chunk(ptr, (*j) * chunk_length, chunk_length); | |
136 | (*encoded)[*j].push_front(chunk); | |
137 | } | |
138 | return 0; | |
139 | } | |
140 | ||
141 | int encode_chunks(const set<int> &want_to_encode, | |
142 | map<int, bufferlist> *encoded) override { | |
143 | ceph_abort(); | |
144 | return 0; | |
145 | } | |
146 | ||
147 | int decode(const set<int> &want_to_read, | |
148 | const map<int, bufferlist> &chunks, | |
149 | map<int, bufferlist> *decoded) override { | |
150 | // | |
151 | // All chunks have the same size | |
152 | // | |
153 | unsigned chunk_length = (*chunks.begin()).second.length(); | |
154 | for (set<int>::iterator i = want_to_read.begin(); | |
155 | i != want_to_read.end(); | |
156 | ++i) { | |
157 | if (chunks.find(*i) != chunks.end()) { | |
158 | // | |
159 | // If the chunk is available, just copy the bufferptr pointer | |
160 | // to the decoded argument. | |
161 | // | |
162 | (*decoded)[*i] = chunks.find(*i)->second; | |
163 | } else if(chunks.size() != 2) { | |
164 | // | |
165 | // If a chunk is missing and there are not enough chunks | |
166 | // to recover, abort. | |
167 | // | |
168 | return -ERANGE; | |
169 | } else { | |
170 | // | |
171 | // No matter what the missing chunk is, XOR of the other | |
172 | // two recovers it. | |
173 | // | |
174 | map<int, bufferlist>::const_iterator k = chunks.begin(); | |
175 | const char *a = k->second.front().c_str(); | |
176 | ++k; | |
177 | const char *b = k->second.front().c_str(); | |
178 | bufferptr chunk(chunk_length); | |
179 | char *c = chunk.c_str(); | |
180 | for (unsigned j = 0; j < chunk_length; j++) { | |
181 | c[j] = a[j] ^ b[j]; | |
182 | } | |
183 | (*decoded)[*i].push_front(chunk); | |
184 | } | |
185 | } | |
186 | return 0; | |
187 | } | |
188 | ||
189 | int decode_chunks(const set<int> &want_to_read, | |
190 | const map<int, bufferlist> &chunks, | |
191 | map<int, bufferlist> *decoded) override { | |
192 | ceph_abort(); | |
193 | return 0; | |
194 | } | |
195 | ||
196 | const vector<int> &get_chunk_mapping() const override { | |
197 | static vector<int> mapping; | |
198 | return mapping; | |
199 | } | |
200 | ||
201 | }; | |
202 | ||
203 | #endif |