]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #include "include/types.h" | |
16 | #include "include/buffer.h" | |
17 | #include "osd/osd_types.h" | |
18 | #include <errno.h> | |
19 | ||
20 | #include "HashIndex.h" | |
21 | ||
22 | #include "common/errno.h" | |
23 | #include "common/debug.h" | |
24 | #define dout_context cct | |
25 | #define dout_subsys ceph_subsys_filestore | |
26 | ||
27 | const string HashIndex::SUBDIR_ATTR = "contents"; | |
28 | const string HashIndex::IN_PROGRESS_OP_TAG = "in_progress_op"; | |
29 | ||
30 | /// hex digit to integer value | |
31 | int hex_to_int(char c) | |
32 | { | |
33 | if (c >= '0' && c <= '9') | |
34 | return c - '0'; | |
35 | if (c >= 'A' && c <= 'F') | |
36 | return c - 'A' + 10; | |
37 | ceph_abort(); | |
38 | } | |
39 | ||
40 | /// int value to hex digit | |
41 | char int_to_hex(int v) | |
42 | { | |
43 | assert(v < 16); | |
44 | if (v < 10) | |
45 | return '0' + v; | |
46 | return 'A' + v - 10; | |
47 | } | |
48 | ||
49 | /// reverse bits in a nibble (0..15) | |
50 | int reverse_nibble_bits(int in) | |
51 | { | |
52 | assert(in < 16); | |
53 | return | |
54 | ((in & 8) >> 3) | | |
55 | ((in & 4) >> 1) | | |
56 | ((in & 2) << 1) | | |
57 | ((in & 1) << 3); | |
58 | } | |
59 | ||
60 | /// reverse nibble bits in a hex digit | |
61 | char reverse_hexdigit_bits(char c) | |
62 | { | |
63 | return int_to_hex(reverse_nibble_bits(hex_to_int(c))); | |
64 | } | |
65 | ||
66 | /// reverse nibble bits in a hex string | |
67 | string reverse_hexdigit_bits_string(string s) | |
68 | { | |
69 | for (unsigned i=0; i<s.size(); ++i) | |
70 | s[i] = reverse_hexdigit_bits(s[i]); | |
71 | return s; | |
72 | } | |
73 | ||
74 | /// compare hex digit (as length 1 string) bitwise | |
75 | bool cmp_hexdigit_bitwise(const string& l, const string& r) | |
76 | { | |
77 | assert(l.length() == 1 && r.length() == 1); | |
78 | int lv = hex_to_int(l[0]); | |
79 | int rv = hex_to_int(r[0]); | |
80 | assert(lv < 16); | |
81 | assert(rv < 16); | |
82 | return reverse_nibble_bits(lv) < reverse_nibble_bits(rv); | |
83 | } | |
84 | ||
85 | /// compare hex digit string bitwise | |
86 | bool cmp_hexdigit_string_bitwise(const string& l, const string& r) | |
87 | { | |
88 | string ll = reverse_hexdigit_bits_string(l); | |
89 | string rr = reverse_hexdigit_bits_string(r); | |
90 | return ll < rr; | |
91 | } | |
92 | ||
93 | int HashIndex::cleanup() { | |
94 | bufferlist bl; | |
95 | int r = get_attr_path(vector<string>(), IN_PROGRESS_OP_TAG, bl); | |
96 | if (r < 0) { | |
97 | // No in progress operations! | |
98 | return 0; | |
99 | } | |
100 | bufferlist::iterator i = bl.begin(); | |
101 | InProgressOp in_progress(i); | |
102 | subdir_info_s info; | |
103 | r = get_info(in_progress.path, &info); | |
104 | if (r == -ENOENT) { | |
105 | return end_split_or_merge(in_progress.path); | |
106 | } else if (r < 0) { | |
107 | return r; | |
108 | } | |
109 | ||
110 | if (in_progress.is_split()) | |
111 | return complete_split(in_progress.path, info); | |
112 | else if (in_progress.is_merge()) | |
113 | return complete_merge(in_progress.path, info); | |
114 | else if (in_progress.is_col_split()) { | |
115 | for (vector<string>::iterator i = in_progress.path.begin(); | |
116 | i != in_progress.path.end(); | |
117 | ++i) { | |
118 | vector<string> path(in_progress.path.begin(), i); | |
119 | int r = reset_attr(path); | |
120 | if (r < 0) | |
121 | return r; | |
122 | } | |
123 | return 0; | |
124 | } | |
125 | else | |
126 | return -EINVAL; | |
127 | } | |
128 | ||
129 | int HashIndex::reset_attr( | |
130 | const vector<string> &path) | |
131 | { | |
132 | int exists = 0; | |
133 | int r = path_exists(path, &exists); | |
134 | if (r < 0) | |
135 | return r; | |
136 | if (!exists) | |
137 | return 0; | |
138 | map<string, ghobject_t> objects; | |
139 | vector<string> subdirs; | |
140 | r = list_objects(path, 0, 0, &objects); | |
141 | if (r < 0) | |
142 | return r; | |
143 | r = list_subdirs(path, &subdirs); | |
144 | if (r < 0) | |
145 | return r; | |
146 | ||
147 | subdir_info_s info; | |
148 | info.hash_level = path.size(); | |
149 | info.objs = objects.size(); | |
150 | info.subdirs = subdirs.size(); | |
151 | return set_info(path, info); | |
152 | } | |
153 | ||
154 | int HashIndex::col_split_level( | |
155 | HashIndex &from, | |
156 | HashIndex &to, | |
157 | const vector<string> &path, | |
158 | uint32_t inbits, | |
159 | uint32_t match, | |
160 | unsigned *mkdirred) | |
161 | { | |
162 | /* For each subdir, move, recurse, or ignore based on comparing the low order | |
163 | * bits of the hash represented by the subdir path with inbits, match passed | |
164 | * in. | |
165 | */ | |
166 | vector<string> subdirs; | |
167 | int r = from.list_subdirs(path, &subdirs); | |
168 | if (r < 0) | |
169 | return r; | |
170 | map<string, ghobject_t> objects; | |
171 | r = from.list_objects(path, 0, 0, &objects); | |
172 | if (r < 0) | |
173 | return r; | |
174 | ||
175 | set<string> to_move; | |
176 | for (vector<string>::iterator i = subdirs.begin(); | |
177 | i != subdirs.end(); | |
178 | ++i) { | |
179 | uint32_t bits = 0; | |
180 | uint32_t hash = 0; | |
181 | vector<string> sub_path(path.begin(), path.end()); | |
182 | sub_path.push_back(*i); | |
183 | path_to_hobject_hash_prefix(sub_path, &bits, &hash); | |
184 | if (bits < inbits) { | |
185 | if (hobject_t::match_hash(hash, bits, match)) { | |
186 | r = col_split_level( | |
187 | from, | |
188 | to, | |
189 | sub_path, | |
190 | inbits, | |
191 | match, | |
192 | mkdirred); | |
193 | if (r < 0) | |
194 | return r; | |
195 | if (*mkdirred > path.size()) | |
196 | *mkdirred = path.size(); | |
197 | } // else, skip, doesn't need to be moved or recursed into | |
198 | } else { | |
199 | if (hobject_t::match_hash(hash, inbits, match)) { | |
200 | to_move.insert(*i); | |
201 | } | |
202 | } // else, skip, doesn't need to be moved or recursed into | |
203 | } | |
204 | ||
205 | /* Then, do the same for each object */ | |
206 | map<string, ghobject_t> objs_to_move; | |
207 | for (map<string, ghobject_t>::iterator i = objects.begin(); | |
208 | i != objects.end(); | |
209 | ++i) { | |
210 | if (i->second.match(inbits, match)) { | |
211 | objs_to_move.insert(*i); | |
212 | } | |
213 | } | |
214 | ||
215 | if (objs_to_move.empty() && to_move.empty()) | |
216 | return 0; | |
217 | ||
218 | // Make parent directories as needed | |
219 | while (*mkdirred < path.size()) { | |
220 | ++*mkdirred; | |
221 | int exists = 0; | |
222 | vector<string> creating_path(path.begin(), path.begin()+*mkdirred); | |
223 | r = to.path_exists(creating_path, &exists); | |
224 | if (r < 0) | |
225 | return r; | |
226 | if (exists) | |
227 | continue; | |
228 | subdir_info_s info; | |
229 | info.objs = 0; | |
230 | info.subdirs = 0; | |
231 | info.hash_level = creating_path.size(); | |
232 | if (*mkdirred < path.size() - 1) | |
233 | info.subdirs = 1; | |
234 | r = to.start_col_split(creating_path); | |
235 | if (r < 0) | |
236 | return r; | |
237 | r = to.create_path(creating_path); | |
238 | if (r < 0) | |
239 | return r; | |
240 | r = to.set_info(creating_path, info); | |
241 | if (r < 0) | |
242 | return r; | |
243 | r = to.end_split_or_merge(creating_path); | |
244 | if (r < 0) | |
245 | return r; | |
246 | } | |
247 | ||
248 | subdir_info_s from_info; | |
249 | subdir_info_s to_info; | |
250 | r = from.get_info(path, &from_info); | |
251 | if (r < 0) | |
252 | return r; | |
253 | r = to.get_info(path, &to_info); | |
254 | if (r < 0) | |
255 | return r; | |
256 | ||
257 | from.start_col_split(path); | |
258 | to.start_col_split(path); | |
259 | ||
260 | // Do subdir moves | |
261 | for (set<string>::iterator i = to_move.begin(); | |
262 | i != to_move.end(); | |
263 | ++i) { | |
264 | from_info.subdirs--; | |
265 | to_info.subdirs++; | |
266 | r = move_subdir(from, to, path, *i); | |
267 | if (r < 0) | |
268 | return r; | |
269 | } | |
270 | ||
271 | for (map<string, ghobject_t>::iterator i = objs_to_move.begin(); | |
272 | i != objs_to_move.end(); | |
273 | ++i) { | |
274 | from_info.objs--; | |
275 | to_info.objs++; | |
276 | r = move_object(from, to, path, *i); | |
277 | if (r < 0) | |
278 | return r; | |
279 | } | |
280 | ||
281 | ||
282 | r = to.set_info(path, to_info); | |
283 | if (r < 0) | |
284 | return r; | |
285 | r = from.set_info(path, from_info); | |
286 | if (r < 0) | |
287 | return r; | |
288 | from.end_split_or_merge(path); | |
289 | to.end_split_or_merge(path); | |
290 | return 0; | |
291 | } | |
292 | ||
293 | int HashIndex::_split( | |
294 | uint32_t match, | |
295 | uint32_t bits, | |
296 | CollectionIndex* dest) { | |
297 | assert(collection_version() == dest->collection_version()); | |
298 | unsigned mkdirred = 0; | |
299 | return col_split_level( | |
300 | *this, | |
301 | *static_cast<HashIndex*>(dest), | |
302 | vector<string>(), | |
303 | bits, | |
304 | match, | |
305 | &mkdirred); | |
306 | } | |
307 | ||
308 | int HashIndex::split_dirs(const vector<string> &path) { | |
309 | dout(20) << __func__ << " " << path << dendl; | |
310 | subdir_info_s info; | |
311 | int r = get_info(path, &info); | |
312 | if (r < 0) { | |
313 | dout(10) << "error looking up info for " << path << ": " | |
314 | << cpp_strerror(r) << dendl; | |
315 | return r; | |
316 | } | |
317 | ||
318 | if (must_split(info)) { | |
319 | dout(1) << __func__ << " " << path << " has " << info.objs | |
320 | << " objects, starting split." << dendl; | |
321 | r = initiate_split(path, info); | |
322 | if (r < 0) { | |
323 | dout(10) << "error initiating split on " << path << ": " | |
324 | << cpp_strerror(r) << dendl; | |
325 | return r; | |
326 | } | |
327 | ||
328 | r = complete_split(path, info); | |
329 | dout(1) << __func__ << " " << path << " split completed." | |
330 | << dendl; | |
331 | if (r < 0) { | |
332 | dout(10) << "error completing split on " << path << ": " | |
333 | << cpp_strerror(r) << dendl; | |
334 | return r; | |
335 | } | |
336 | } | |
337 | ||
338 | vector<string> subdirs; | |
339 | r = list_subdirs(path, &subdirs); | |
340 | if (r < 0) { | |
341 | dout(10) << "error listing subdirs of " << path << ": " | |
342 | << cpp_strerror(r) << dendl; | |
343 | return r; | |
344 | } | |
345 | for (vector<string>::const_iterator it = subdirs.begin(); | |
346 | it != subdirs.end(); ++it) { | |
347 | vector<string> subdir_path(path); | |
348 | subdir_path.push_back(*it); | |
349 | r = split_dirs(subdir_path); | |
350 | if (r < 0) { | |
351 | return r; | |
352 | } | |
353 | } | |
354 | ||
355 | return r; | |
356 | } | |
357 | ||
358 | int HashIndex::apply_layout_settings() { | |
359 | vector<string> path; | |
360 | dout(10) << __func__ << " split multiple = " << split_multiplier | |
361 | << " merge threshold = " << merge_threshold << dendl; | |
362 | return split_dirs(path); | |
363 | } | |
364 | ||
365 | int HashIndex::_init() { | |
366 | subdir_info_s info; | |
367 | vector<string> path; | |
368 | return set_info(path, info); | |
369 | } | |
370 | ||
371 | /* LFNIndex virtual method implementations */ | |
372 | int HashIndex::_created(const vector<string> &path, | |
373 | const ghobject_t &oid, | |
374 | const string &mangled_name) { | |
375 | subdir_info_s info; | |
376 | int r; | |
377 | r = get_info(path, &info); | |
378 | if (r < 0) | |
379 | return r; | |
380 | info.objs++; | |
381 | r = set_info(path, info); | |
382 | if (r < 0) | |
383 | return r; | |
384 | ||
385 | if (must_split(info)) { | |
386 | dout(1) << __func__ << " " << path << " has " << info.objs | |
387 | << " objects, starting split." << dendl; | |
388 | int r = initiate_split(path, info); | |
389 | if (r < 0) | |
390 | return r; | |
391 | r = complete_split(path, info); | |
392 | dout(1) << __func__ << " " << path << " split completed." | |
393 | << dendl; | |
394 | return r; | |
395 | } else { | |
396 | return 0; | |
397 | } | |
398 | } | |
399 | ||
400 | int HashIndex::_remove(const vector<string> &path, | |
401 | const ghobject_t &oid, | |
402 | const string &mangled_name) { | |
403 | int r; | |
404 | r = remove_object(path, oid); | |
405 | if (r < 0) | |
406 | return r; | |
407 | subdir_info_s info; | |
408 | r = get_info(path, &info); | |
409 | if (r < 0) | |
410 | return r; | |
411 | info.objs--; | |
412 | r = set_info(path, info); | |
413 | if (r < 0) | |
414 | return r; | |
415 | if (must_merge(info)) { | |
416 | r = initiate_merge(path, info); | |
417 | if (r < 0) | |
418 | return r; | |
419 | return complete_merge(path, info); | |
420 | } else { | |
421 | return 0; | |
422 | } | |
423 | } | |
424 | ||
425 | int HashIndex::_lookup(const ghobject_t &oid, | |
426 | vector<string> *path, | |
427 | string *mangled_name, | |
428 | int *hardlink) { | |
429 | vector<string> path_comp; | |
430 | get_path_components(oid, &path_comp); | |
431 | vector<string>::iterator next = path_comp.begin(); | |
432 | int exists; | |
433 | while (1) { | |
434 | int r = path_exists(*path, &exists); | |
435 | if (r < 0) | |
436 | return r; | |
437 | if (!exists) { | |
438 | if (path->empty()) | |
439 | return -ENOENT; | |
440 | path->pop_back(); | |
441 | break; | |
442 | } | |
443 | if (next == path_comp.end()) | |
444 | break; | |
445 | path->push_back(*(next++)); | |
446 | } | |
447 | return get_mangled_name(*path, oid, mangled_name, hardlink); | |
448 | } | |
449 | ||
450 | int HashIndex::_collection_list_partial(const ghobject_t &start, | |
451 | const ghobject_t &end, | |
452 | int max_count, | |
453 | vector<ghobject_t> *ls, | |
454 | ghobject_t *next) { | |
455 | vector<string> path; | |
456 | ghobject_t _next; | |
457 | if (!next) | |
458 | next = &_next; | |
459 | *next = start; | |
460 | dout(20) << __func__ << " start:" << start << " end:" << end << "-" << max_count << " ls.size " << ls->size() << dendl; | |
461 | return list_by_hash(path, end, max_count, next, ls); | |
462 | } | |
463 | ||
464 | int HashIndex::prep_delete() { | |
465 | return recursive_remove(vector<string>()); | |
466 | } | |
467 | ||
468 | int HashIndex::_pre_hash_collection(uint32_t pg_num, uint64_t expected_num_objs) { | |
469 | int ret; | |
470 | vector<string> path; | |
471 | subdir_info_s root_info; | |
472 | // Make sure there is neither objects nor sub-folders | |
473 | // in this collection | |
474 | ret = get_info(path, &root_info); | |
475 | if (ret < 0) | |
476 | return ret; | |
477 | ||
478 | // Do the folder splitting first | |
479 | ret = pre_split_folder(pg_num, expected_num_objs); | |
480 | if (ret < 0) | |
481 | return ret; | |
482 | // Initialize the folder info starting from root | |
483 | return init_split_folder(path, 0); | |
484 | } | |
485 | ||
486 | int HashIndex::pre_split_folder(uint32_t pg_num, uint64_t expected_num_objs) | |
487 | { | |
488 | // If folder merging is enabled (by setting the threshold positive), | |
489 | // no need to split | |
490 | if (merge_threshold > 0) | |
491 | return 0; | |
492 | const coll_t c = coll(); | |
493 | // Do not split if the expected number of objects in this collection is zero (by default) | |
494 | if (expected_num_objs == 0) | |
495 | return 0; | |
496 | ||
497 | // Calculate the number of leaf folders (which actually store files) | |
498 | // need to be created | |
499 | const uint64_t objs_per_folder = (uint64_t)(abs(merge_threshold)) * (uint64_t)split_multiplier * 16; | |
500 | uint64_t leavies = expected_num_objs / objs_per_folder ; | |
501 | // No need to split | |
502 | if (leavies == 0 || expected_num_objs == objs_per_folder) | |
503 | return 0; | |
504 | ||
505 | spg_t spgid; | |
506 | if (!c.is_pg_prefix(&spgid)) | |
507 | return -EINVAL; | |
508 | const ps_t ps = spgid.pgid.ps(); | |
509 | ||
510 | // the most significant bits of pg_num | |
511 | const int pg_num_bits = calc_num_bits(pg_num - 1); | |
512 | ps_t tmp_id = ps; | |
513 | // calculate the number of levels we only create one sub folder | |
514 | int num = pg_num_bits / 4; | |
515 | // pg num's hex value is like 1xxx,xxxx,xxxx but not 1111,1111,1111, | |
516 | // so that splitting starts at level 3 | |
517 | if (pg_num_bits % 4 == 0 && pg_num < ((uint32_t)1 << pg_num_bits)) { | |
518 | --num; | |
519 | } | |
520 | ||
521 | int ret; | |
522 | // Start with creation that only has one subfolder | |
523 | vector<string> paths; | |
524 | int dump_num = num; | |
525 | while (num-- > 0) { | |
526 | ps_t v = tmp_id & 0x0000000f; | |
527 | paths.push_back(to_hex(v)); | |
528 | ret = create_path(paths); | |
529 | if (ret < 0 && ret != -EEXIST) | |
530 | return ret; | |
531 | tmp_id = tmp_id >> 4; | |
532 | } | |
533 | ||
534 | // Starting from here, we can split by creating multiple subfolders | |
535 | const int left_bits = pg_num_bits - dump_num * 4; | |
536 | // this variable denotes how many bits (for this level) that can be | |
537 | // used for sub folder splitting | |
538 | int split_bits = 4 - left_bits; | |
539 | // the below logic is inspired by rados.h#ceph_stable_mod, | |
540 | // it basically determines how many sub-folders should we | |
541 | // create for splitting | |
542 | assert(pg_num_bits > 0); // otherwise BAD_SHIFT | |
543 | if (((1 << (pg_num_bits - 1)) | ps) >= pg_num) { | |
544 | ++split_bits; | |
545 | } | |
546 | const uint32_t subs = (1 << split_bits); | |
547 | // Calculate how many levels we create starting from here | |
548 | int level = 0; | |
549 | leavies /= subs; | |
550 | while (leavies > 1) { | |
551 | ++level; | |
552 | leavies = leavies >> 4; | |
553 | } | |
554 | for (uint32_t i = 0; i < subs; ++i) { | |
555 | assert(split_bits <= 4); // otherwise BAD_SHIFT | |
556 | int v = tmp_id | (i << ((4 - split_bits) % 4)); | |
557 | paths.push_back(to_hex(v)); | |
558 | ret = create_path(paths); | |
559 | if (ret < 0 && ret != -EEXIST) | |
560 | return ret; | |
561 | ret = recursive_create_path(paths, level); | |
562 | if (ret < 0) | |
563 | return ret; | |
564 | paths.pop_back(); | |
565 | } | |
566 | return 0; | |
567 | } | |
568 | ||
569 | int HashIndex::init_split_folder(vector<string> &path, uint32_t hash_level) | |
570 | { | |
571 | // Get the number of sub directories for the current path | |
572 | vector<string> subdirs; | |
573 | int ret = list_subdirs(path, &subdirs); | |
574 | if (ret < 0) | |
575 | return ret; | |
576 | subdir_info_s info; | |
577 | info.subdirs = subdirs.size(); | |
578 | info.hash_level = hash_level; | |
579 | ret = set_info(path, info); | |
580 | if (ret < 0) | |
581 | return ret; | |
582 | ret = fsync_dir(path); | |
583 | if (ret < 0) | |
584 | return ret; | |
585 | ||
586 | // Do the same for subdirs | |
587 | vector<string>::const_iterator iter; | |
588 | for (iter = subdirs.begin(); iter != subdirs.end(); ++iter) { | |
589 | path.push_back(*iter); | |
590 | ret = init_split_folder(path, hash_level + 1); | |
591 | if (ret < 0) | |
592 | return ret; | |
593 | path.pop_back(); | |
594 | } | |
595 | return 0; | |
596 | } | |
597 | ||
598 | int HashIndex::recursive_create_path(vector<string>& path, int level) | |
599 | { | |
600 | if (level == 0) | |
601 | return 0; | |
602 | for (int i = 0; i < 16; ++i) { | |
603 | path.push_back(to_hex(i)); | |
604 | int ret = create_path(path); | |
605 | if (ret < 0 && ret != -EEXIST) | |
606 | return ret; | |
607 | ret = recursive_create_path(path, level - 1); | |
608 | if (ret < 0) | |
609 | return ret; | |
610 | path.pop_back(); | |
611 | } | |
612 | return 0; | |
613 | } | |
614 | ||
615 | int HashIndex::recursive_remove(const vector<string> &path) { | |
616 | return _recursive_remove(path, true); | |
617 | } | |
618 | ||
619 | int HashIndex::_recursive_remove(const vector<string> &path, bool top) { | |
620 | vector<string> subdirs; | |
621 | dout(20) << __func__ << " path=" << path << dendl; | |
622 | int r = list_subdirs(path, &subdirs); | |
623 | if (r < 0) | |
624 | return r; | |
625 | map<string, ghobject_t> objects; | |
626 | r = list_objects(path, 0, 0, &objects); | |
627 | if (r < 0) | |
628 | return r; | |
629 | if (!objects.empty()) | |
630 | return -ENOTEMPTY; | |
631 | vector<string> subdir(path); | |
632 | for (vector<string>::iterator i = subdirs.begin(); | |
633 | i != subdirs.end(); | |
634 | ++i) { | |
635 | subdir.push_back(*i); | |
636 | r = _recursive_remove(subdir, false); | |
637 | if (r < 0) | |
638 | return r; | |
639 | subdir.pop_back(); | |
640 | } | |
641 | if (top) | |
642 | return 0; | |
643 | else | |
644 | return remove_path(path); | |
645 | } | |
646 | ||
647 | int HashIndex::start_col_split(const vector<string> &path) { | |
648 | bufferlist bl; | |
649 | InProgressOp op_tag(InProgressOp::COL_SPLIT, path); | |
650 | op_tag.encode(bl); | |
651 | int r = add_attr_path(vector<string>(), IN_PROGRESS_OP_TAG, bl); | |
652 | if (r < 0) | |
653 | return r; | |
654 | return fsync_dir(vector<string>()); | |
655 | } | |
656 | ||
657 | int HashIndex::start_split(const vector<string> &path) { | |
658 | bufferlist bl; | |
659 | InProgressOp op_tag(InProgressOp::SPLIT, path); | |
660 | op_tag.encode(bl); | |
661 | int r = add_attr_path(vector<string>(), IN_PROGRESS_OP_TAG, bl); | |
662 | if (r < 0) | |
663 | return r; | |
664 | return fsync_dir(vector<string>()); | |
665 | } | |
666 | ||
667 | int HashIndex::start_merge(const vector<string> &path) { | |
668 | bufferlist bl; | |
669 | InProgressOp op_tag(InProgressOp::MERGE, path); | |
670 | op_tag.encode(bl); | |
671 | int r = add_attr_path(vector<string>(), IN_PROGRESS_OP_TAG, bl); | |
672 | if (r < 0) | |
673 | return r; | |
674 | return fsync_dir(vector<string>()); | |
675 | } | |
676 | ||
677 | int HashIndex::end_split_or_merge(const vector<string> &path) { | |
678 | return remove_attr_path(vector<string>(), IN_PROGRESS_OP_TAG); | |
679 | } | |
680 | ||
681 | int HashIndex::get_info(const vector<string> &path, subdir_info_s *info) { | |
682 | bufferlist buf; | |
683 | int r = get_attr_path(path, SUBDIR_ATTR, buf); | |
684 | if (r < 0) | |
685 | return r; | |
686 | bufferlist::iterator bufiter = buf.begin(); | |
687 | info->decode(bufiter); | |
688 | assert(path.size() == (unsigned)info->hash_level); | |
689 | return 0; | |
690 | } | |
691 | ||
692 | int HashIndex::set_info(const vector<string> &path, const subdir_info_s &info) { | |
693 | bufferlist buf; | |
694 | assert(path.size() == (unsigned)info.hash_level); | |
695 | info.encode(buf); | |
696 | return add_attr_path(path, SUBDIR_ATTR, buf); | |
697 | } | |
698 | ||
699 | bool HashIndex::must_merge(const subdir_info_s &info) { | |
700 | return (info.hash_level > 0 && | |
701 | merge_threshold > 0 && | |
702 | info.objs < (unsigned)merge_threshold && | |
703 | info.subdirs == 0); | |
704 | } | |
705 | ||
706 | bool HashIndex::must_split(const subdir_info_s &info) { | |
707 | return (info.hash_level < (unsigned)MAX_HASH_LEVEL && | |
708 | info.objs > ((unsigned)(abs(merge_threshold)) * 16 * split_multiplier)); | |
709 | ||
710 | } | |
711 | ||
712 | int HashIndex::initiate_merge(const vector<string> &path, subdir_info_s info) { | |
713 | return start_merge(path); | |
714 | } | |
715 | ||
716 | int HashIndex::complete_merge(const vector<string> &path, subdir_info_s info) { | |
717 | vector<string> dst = path; | |
718 | dst.pop_back(); | |
719 | subdir_info_s dstinfo; | |
720 | int r, exists; | |
721 | r = path_exists(path, &exists); | |
722 | if (r < 0) | |
723 | return r; | |
724 | r = get_info(dst, &dstinfo); | |
725 | if (r < 0) | |
726 | return r; | |
727 | if (exists) { | |
728 | r = move_objects(path, dst); | |
729 | if (r < 0) | |
730 | return r; | |
731 | r = reset_attr(dst); | |
732 | if (r < 0) | |
733 | return r; | |
734 | r = remove_path(path); | |
735 | if (r < 0) | |
736 | return r; | |
737 | } | |
738 | if (must_merge(dstinfo)) { | |
739 | r = initiate_merge(dst, dstinfo); | |
740 | if (r < 0) | |
741 | return r; | |
742 | r = fsync_dir(dst); | |
743 | if (r < 0) | |
744 | return r; | |
745 | return complete_merge(dst, dstinfo); | |
746 | } | |
747 | r = fsync_dir(dst); | |
748 | if (r < 0) | |
749 | return r; | |
750 | return end_split_or_merge(path); | |
751 | } | |
752 | ||
753 | int HashIndex::initiate_split(const vector<string> &path, subdir_info_s info) { | |
754 | return start_split(path); | |
755 | } | |
756 | ||
757 | int HashIndex::complete_split(const vector<string> &path, subdir_info_s info) { | |
758 | int level = info.hash_level; | |
759 | map<string, ghobject_t> objects; | |
760 | vector<string> dst = path; | |
761 | int r; | |
762 | dst.push_back(""); | |
763 | r = list_objects(path, 0, 0, &objects); | |
764 | if (r < 0) | |
765 | return r; | |
766 | vector<string> subdirs_vec; | |
767 | r = list_subdirs(path, &subdirs_vec); | |
768 | if (r < 0) | |
769 | return r; | |
770 | set<string> subdirs; | |
771 | subdirs.insert(subdirs_vec.begin(), subdirs_vec.end()); | |
772 | map<string, map<string, ghobject_t> > mapped; | |
773 | map<string, ghobject_t> moved; | |
774 | int num_moved = 0; | |
775 | for (map<string, ghobject_t>::iterator i = objects.begin(); | |
776 | i != objects.end(); | |
777 | ++i) { | |
778 | vector<string> new_path; | |
779 | get_path_components(i->second, &new_path); | |
780 | mapped[new_path[level]][i->first] = i->second; | |
781 | } | |
782 | for (map<string, map<string, ghobject_t> >::iterator i = mapped.begin(); | |
783 | i != mapped.end(); | |
784 | ) { | |
785 | dst[level] = i->first; | |
786 | /* If the info already exists, it must be correct, | |
787 | * we may be picking up a partially finished split */ | |
788 | subdir_info_s temp; | |
789 | // subdir has already been fully copied | |
790 | if (subdirs.count(i->first) && !get_info(dst, &temp)) { | |
791 | for (map<string, ghobject_t>::iterator j = i->second.begin(); | |
792 | j != i->second.end(); | |
793 | ++j) { | |
794 | moved[j->first] = j->second; | |
795 | num_moved++; | |
796 | objects.erase(j->first); | |
797 | } | |
798 | ++i; | |
799 | continue; | |
800 | } | |
801 | ||
802 | subdir_info_s info_new; | |
803 | info_new.objs = i->second.size(); | |
804 | info_new.subdirs = 0; | |
805 | info_new.hash_level = level + 1; | |
806 | if (must_merge(info_new) && !subdirs.count(i->first)) { | |
807 | mapped.erase(i++); | |
808 | continue; | |
809 | } | |
810 | ||
811 | // Subdir doesn't yet exist | |
812 | if (!subdirs.count(i->first)) { | |
813 | info.subdirs += 1; | |
814 | r = create_path(dst); | |
815 | if (r < 0) | |
816 | return r; | |
817 | } // else subdir has been created but only partially copied | |
818 | ||
819 | for (map<string, ghobject_t>::iterator j = i->second.begin(); | |
820 | j != i->second.end(); | |
821 | ++j) { | |
822 | moved[j->first] = j->second; | |
823 | num_moved++; | |
824 | objects.erase(j->first); | |
825 | r = link_object(path, dst, j->second, j->first); | |
826 | // May be a partially finished split | |
827 | if (r < 0 && r != -EEXIST) { | |
828 | return r; | |
829 | } | |
830 | } | |
831 | ||
832 | r = fsync_dir(dst); | |
833 | if (r < 0) | |
834 | return r; | |
835 | ||
836 | // Presence of info must imply that all objects have been copied | |
837 | r = set_info(dst, info_new); | |
838 | if (r < 0) | |
839 | return r; | |
840 | ||
841 | r = fsync_dir(dst); | |
842 | if (r < 0) | |
843 | return r; | |
844 | ||
845 | ++i; | |
846 | } | |
847 | r = remove_objects(path, moved, &objects); | |
848 | if (r < 0) | |
849 | return r; | |
850 | info.objs = objects.size(); | |
851 | r = reset_attr(path); | |
852 | if (r < 0) | |
853 | return r; | |
854 | r = fsync_dir(path); | |
855 | if (r < 0) | |
856 | return r; | |
857 | return end_split_or_merge(path); | |
858 | } | |
859 | ||
860 | void HashIndex::get_path_components(const ghobject_t &oid, | |
861 | vector<string> *path) { | |
862 | char buf[MAX_HASH_LEVEL + 1]; | |
863 | snprintf(buf, sizeof(buf), "%.*X", MAX_HASH_LEVEL, (uint32_t)oid.hobj.get_nibblewise_key()); | |
864 | ||
865 | // Path components are the hex characters of oid.hobj.hash, least | |
866 | // significant first | |
867 | for (int i = 0; i < MAX_HASH_LEVEL; ++i) { | |
868 | path->push_back(string(&buf[i], 1)); | |
869 | } | |
870 | } | |
871 | ||
872 | string HashIndex::get_hash_str(uint32_t hash) { | |
873 | char buf[MAX_HASH_LEVEL + 1]; | |
874 | snprintf(buf, sizeof(buf), "%.*X", MAX_HASH_LEVEL, hash); | |
875 | string retval; | |
876 | for (int i = 0; i < MAX_HASH_LEVEL; ++i) { | |
877 | retval.push_back(buf[MAX_HASH_LEVEL - 1 - i]); | |
878 | } | |
879 | return retval; | |
880 | } | |
881 | ||
882 | string HashIndex::get_path_str(const ghobject_t &oid) { | |
883 | assert(!oid.is_max()); | |
884 | return get_hash_str(oid.hobj.get_hash()); | |
885 | } | |
886 | ||
887 | uint32_t HashIndex::hash_prefix_to_hash(string prefix) { | |
888 | while (prefix.size() < sizeof(uint32_t) * 2) { | |
889 | prefix.push_back('0'); | |
890 | } | |
891 | uint32_t hash; | |
892 | sscanf(prefix.c_str(), "%x", &hash); | |
893 | // nibble reverse | |
894 | hash = ((hash & 0x0f0f0f0f) << 4) | ((hash & 0xf0f0f0f0) >> 4); | |
895 | hash = ((hash & 0x00ff00ff) << 8) | ((hash & 0xff00ff00) >> 8); | |
896 | hash = ((hash & 0x0000ffff) << 16) | ((hash & 0xffff0000) >> 16); | |
897 | return hash; | |
898 | } | |
899 | ||
900 | int HashIndex::get_path_contents_by_hash_bitwise( | |
901 | const vector<string> &path, | |
902 | const ghobject_t *next_object, | |
903 | set<string, CmpHexdigitStringBitwise> *hash_prefixes, | |
904 | set<pair<string, ghobject_t>, CmpPairBitwise> *objects) | |
905 | { | |
906 | map<string, ghobject_t> rev_objects; | |
907 | int r; | |
908 | r = list_objects(path, 0, 0, &rev_objects); | |
909 | if (r < 0) | |
910 | return r; | |
911 | // bitwise sort | |
912 | for (map<string, ghobject_t>::iterator i = rev_objects.begin(); | |
913 | i != rev_objects.end(); | |
914 | ++i) { | |
915 | if (next_object && i->second < *next_object) | |
916 | continue; | |
917 | string hash_prefix = get_path_str(i->second); | |
918 | hash_prefixes->insert(hash_prefix); | |
919 | objects->insert(pair<string, ghobject_t>(hash_prefix, i->second)); | |
920 | } | |
921 | vector<string> subdirs; | |
922 | r = list_subdirs(path, &subdirs); | |
923 | if (r < 0) | |
924 | return r; | |
925 | ||
926 | // sort subdirs bitwise (by reversing hex digit nibbles) | |
927 | std::sort(subdirs.begin(), subdirs.end(), cmp_hexdigit_bitwise); | |
928 | ||
929 | // Local to this function, we will convert the prefix strings | |
930 | // (previously simply the reversed hex digits) to also have each | |
931 | // digit's nibbles reversed. This will make the strings sort | |
932 | // bitwise. | |
933 | string cur_prefix; | |
934 | for (vector<string>::const_iterator i = path.begin(); | |
935 | i != path.end(); | |
936 | ++i) { | |
937 | cur_prefix.append(reverse_hexdigit_bits_string(*i)); | |
938 | } | |
939 | string next_object_string; | |
940 | if (next_object) | |
941 | next_object_string = reverse_hexdigit_bits_string(get_path_str(*next_object)); | |
942 | for (vector<string>::iterator i = subdirs.begin(); | |
943 | i != subdirs.end(); | |
944 | ++i) { | |
945 | string candidate = cur_prefix + reverse_hexdigit_bits_string(*i); | |
946 | if (next_object) { | |
947 | if (next_object->is_max()) | |
948 | continue; | |
949 | if (candidate < next_object_string.substr(0, candidate.size())) | |
950 | continue; | |
951 | } | |
952 | // re-reverse the hex digit nibbles for the caller | |
953 | hash_prefixes->insert(reverse_hexdigit_bits_string(candidate)); | |
954 | } | |
955 | return 0; | |
956 | } | |
957 | ||
958 | int HashIndex::list_by_hash(const vector<string> &path, | |
959 | const ghobject_t &end, | |
960 | int max_count, | |
961 | ghobject_t *next, | |
962 | vector<ghobject_t> *out) | |
963 | { | |
964 | assert(out); | |
965 | return list_by_hash_bitwise(path, end, max_count, next, out); | |
966 | } | |
967 | ||
968 | int HashIndex::list_by_hash_bitwise( | |
969 | const vector<string> &path, | |
970 | const ghobject_t& end, | |
971 | int max_count, | |
972 | ghobject_t *next, | |
973 | vector<ghobject_t> *out) | |
974 | { | |
975 | vector<string> next_path = path; | |
976 | next_path.push_back(""); | |
977 | set<string, CmpHexdigitStringBitwise> hash_prefixes; | |
978 | set<pair<string, ghobject_t>, CmpPairBitwise> objects; | |
979 | int r = get_path_contents_by_hash_bitwise(path, | |
980 | next, | |
981 | &hash_prefixes, | |
982 | &objects); | |
983 | if (r < 0) | |
984 | return r; | |
985 | for (set<string, CmpHexdigitStringBitwise>::iterator i = hash_prefixes.begin(); | |
986 | i != hash_prefixes.end(); | |
987 | ++i) { | |
988 | dout(20) << __func__ << " prefix " << *i << dendl; | |
989 | set<pair<string, ghobject_t>, CmpPairBitwise>::iterator j = objects.lower_bound( | |
990 | make_pair(*i, ghobject_t())); | |
991 | if (j == objects.end() || j->first != *i) { | |
992 | *(next_path.rbegin()) = *(i->rbegin()); | |
993 | ghobject_t next_recurse; | |
994 | if (next) | |
995 | next_recurse = *next; | |
996 | r = list_by_hash_bitwise(next_path, | |
997 | end, | |
998 | max_count, | |
999 | &next_recurse, | |
1000 | out); | |
1001 | ||
1002 | if (r < 0) | |
1003 | return r; | |
1004 | if (!next_recurse.is_max()) { | |
1005 | if (next) | |
1006 | *next = next_recurse; | |
1007 | return 0; | |
1008 | } | |
1009 | } else { | |
1010 | while (j != objects.end() && j->first == *i) { | |
1011 | if (max_count > 0 && out->size() == (unsigned)max_count) { | |
1012 | if (next) | |
1013 | *next = j->second; | |
1014 | return 0; | |
1015 | } | |
1016 | if (j->second >= end) { | |
1017 | if (next) | |
1018 | *next = j->second; | |
1019 | return 0; | |
1020 | } | |
1021 | if (!next || j->second >= *next) { | |
1022 | dout(20) << __func__ << " prefix " << *i << " ob " << j->second << dendl; | |
1023 | out->push_back(j->second); | |
1024 | } | |
1025 | ++j; | |
1026 | } | |
1027 | } | |
1028 | } | |
1029 | if (next) | |
1030 | *next = ghobject_t::get_max(); | |
1031 | return 0; | |
1032 | } | |
1033 | ||
1034 |