]> git.proxmox.com Git - ceph.git/blob - ceph/src/os/filestore/HashIndex.cc
update sources to v12.1.1
[ceph.git] / ceph / src / os / filestore / HashIndex.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15 #include "include/types.h"
16 #include "include/buffer.h"
17 #include "osd/osd_types.h"
18 #include <errno.h>
19
20 #include "HashIndex.h"
21
22 #include "common/errno.h"
23 #include "common/debug.h"
24 #define dout_context cct
25 #define dout_subsys ceph_subsys_filestore
26
27 const string HashIndex::SUBDIR_ATTR = "contents";
28 const string HashIndex::SETTINGS_ATTR = "settings";
29 const string HashIndex::IN_PROGRESS_OP_TAG = "in_progress_op";
30
31 /// hex digit to integer value
32 int hex_to_int(char c)
33 {
34 if (c >= '0' && c <= '9')
35 return c - '0';
36 if (c >= 'A' && c <= 'F')
37 return c - 'A' + 10;
38 ceph_abort();
39 }
40
41 /// int value to hex digit
42 char int_to_hex(int v)
43 {
44 assert(v < 16);
45 if (v < 10)
46 return '0' + v;
47 return 'A' + v - 10;
48 }
49
50 /// reverse bits in a nibble (0..15)
51 int reverse_nibble_bits(int in)
52 {
53 assert(in < 16);
54 return
55 ((in & 8) >> 3) |
56 ((in & 4) >> 1) |
57 ((in & 2) << 1) |
58 ((in & 1) << 3);
59 }
60
61 /// reverse nibble bits in a hex digit
62 char reverse_hexdigit_bits(char c)
63 {
64 return int_to_hex(reverse_nibble_bits(hex_to_int(c)));
65 }
66
67 /// reverse nibble bits in a hex string
68 string reverse_hexdigit_bits_string(string s)
69 {
70 for (unsigned i=0; i<s.size(); ++i)
71 s[i] = reverse_hexdigit_bits(s[i]);
72 return s;
73 }
74
75 /// compare hex digit (as length 1 string) bitwise
76 bool cmp_hexdigit_bitwise(const string& l, const string& r)
77 {
78 assert(l.length() == 1 && r.length() == 1);
79 int lv = hex_to_int(l[0]);
80 int rv = hex_to_int(r[0]);
81 assert(lv < 16);
82 assert(rv < 16);
83 return reverse_nibble_bits(lv) < reverse_nibble_bits(rv);
84 }
85
86 /// compare hex digit string bitwise
87 bool cmp_hexdigit_string_bitwise(const string& l, const string& r)
88 {
89 string ll = reverse_hexdigit_bits_string(l);
90 string rr = reverse_hexdigit_bits_string(r);
91 return ll < rr;
92 }
93
94 int HashIndex::cleanup() {
95 bufferlist bl;
96 int r = get_attr_path(vector<string>(), IN_PROGRESS_OP_TAG, bl);
97 if (r < 0) {
98 // No in progress operations!
99 return 0;
100 }
101 bufferlist::iterator i = bl.begin();
102 InProgressOp in_progress(i);
103 subdir_info_s info;
104 r = get_info(in_progress.path, &info);
105 if (r == -ENOENT) {
106 return end_split_or_merge(in_progress.path);
107 } else if (r < 0) {
108 return r;
109 }
110
111 if (in_progress.is_split())
112 return complete_split(in_progress.path, info);
113 else if (in_progress.is_merge())
114 return complete_merge(in_progress.path, info);
115 else if (in_progress.is_col_split()) {
116 for (vector<string>::iterator i = in_progress.path.begin();
117 i != in_progress.path.end();
118 ++i) {
119 vector<string> path(in_progress.path.begin(), i);
120 int r = reset_attr(path);
121 if (r < 0)
122 return r;
123 }
124 return 0;
125 }
126 else
127 return -EINVAL;
128 }
129
130 int HashIndex::reset_attr(
131 const vector<string> &path)
132 {
133 int exists = 0;
134 int r = path_exists(path, &exists);
135 if (r < 0)
136 return r;
137 if (!exists)
138 return 0;
139 map<string, ghobject_t> objects;
140 vector<string> subdirs;
141 r = list_objects(path, 0, 0, &objects);
142 if (r < 0)
143 return r;
144 r = list_subdirs(path, &subdirs);
145 if (r < 0)
146 return r;
147
148 subdir_info_s info;
149 info.hash_level = path.size();
150 info.objs = objects.size();
151 info.subdirs = subdirs.size();
152 return set_info(path, info);
153 }
154
155 int HashIndex::col_split_level(
156 HashIndex &from,
157 HashIndex &to,
158 const vector<string> &path,
159 uint32_t inbits,
160 uint32_t match,
161 unsigned *mkdirred)
162 {
163 /* For each subdir, move, recurse, or ignore based on comparing the low order
164 * bits of the hash represented by the subdir path with inbits, match passed
165 * in.
166 */
167 vector<string> subdirs;
168 int r = from.list_subdirs(path, &subdirs);
169 if (r < 0)
170 return r;
171 map<string, ghobject_t> objects;
172 r = from.list_objects(path, 0, 0, &objects);
173 if (r < 0)
174 return r;
175
176 set<string> to_move;
177 for (vector<string>::iterator i = subdirs.begin();
178 i != subdirs.end();
179 ++i) {
180 uint32_t bits = 0;
181 uint32_t hash = 0;
182 vector<string> sub_path(path.begin(), path.end());
183 sub_path.push_back(*i);
184 path_to_hobject_hash_prefix(sub_path, &bits, &hash);
185 if (bits < inbits) {
186 if (hobject_t::match_hash(hash, bits, match)) {
187 r = col_split_level(
188 from,
189 to,
190 sub_path,
191 inbits,
192 match,
193 mkdirred);
194 if (r < 0)
195 return r;
196 if (*mkdirred > path.size())
197 *mkdirred = path.size();
198 } // else, skip, doesn't need to be moved or recursed into
199 } else {
200 if (hobject_t::match_hash(hash, inbits, match)) {
201 to_move.insert(*i);
202 }
203 } // else, skip, doesn't need to be moved or recursed into
204 }
205
206 /* Then, do the same for each object */
207 map<string, ghobject_t> objs_to_move;
208 for (map<string, ghobject_t>::iterator i = objects.begin();
209 i != objects.end();
210 ++i) {
211 if (i->second.match(inbits, match)) {
212 objs_to_move.insert(*i);
213 }
214 }
215
216 if (objs_to_move.empty() && to_move.empty())
217 return 0;
218
219 // Make parent directories as needed
220 while (*mkdirred < path.size()) {
221 ++*mkdirred;
222 int exists = 0;
223 vector<string> creating_path(path.begin(), path.begin()+*mkdirred);
224 r = to.path_exists(creating_path, &exists);
225 if (r < 0)
226 return r;
227 if (exists)
228 continue;
229 subdir_info_s info;
230 info.objs = 0;
231 info.subdirs = 0;
232 info.hash_level = creating_path.size();
233 if (*mkdirred < path.size() - 1)
234 info.subdirs = 1;
235 r = to.start_col_split(creating_path);
236 if (r < 0)
237 return r;
238 r = to.create_path(creating_path);
239 if (r < 0)
240 return r;
241 r = to.set_info(creating_path, info);
242 if (r < 0)
243 return r;
244 r = to.end_split_or_merge(creating_path);
245 if (r < 0)
246 return r;
247 }
248
249 subdir_info_s from_info;
250 subdir_info_s to_info;
251 r = from.get_info(path, &from_info);
252 if (r < 0)
253 return r;
254 r = to.get_info(path, &to_info);
255 if (r < 0)
256 return r;
257
258 from.start_col_split(path);
259 to.start_col_split(path);
260
261 // Do subdir moves
262 for (set<string>::iterator i = to_move.begin();
263 i != to_move.end();
264 ++i) {
265 from_info.subdirs--;
266 to_info.subdirs++;
267 r = move_subdir(from, to, path, *i);
268 if (r < 0)
269 return r;
270 }
271
272 for (map<string, ghobject_t>::iterator i = objs_to_move.begin();
273 i != objs_to_move.end();
274 ++i) {
275 from_info.objs--;
276 to_info.objs++;
277 r = move_object(from, to, path, *i);
278 if (r < 0)
279 return r;
280 }
281
282
283 r = to.set_info(path, to_info);
284 if (r < 0)
285 return r;
286 r = from.set_info(path, from_info);
287 if (r < 0)
288 return r;
289 from.end_split_or_merge(path);
290 to.end_split_or_merge(path);
291 return 0;
292 }
293
294 int HashIndex::_split(
295 uint32_t match,
296 uint32_t bits,
297 CollectionIndex* dest) {
298 assert(collection_version() == dest->collection_version());
299 unsigned mkdirred = 0;
300 return col_split_level(
301 *this,
302 *static_cast<HashIndex*>(dest),
303 vector<string>(),
304 bits,
305 match,
306 &mkdirred);
307 }
308
309 int HashIndex::split_dirs(const vector<string> &path) {
310 dout(20) << __func__ << " " << path << dendl;
311 subdir_info_s info;
312 int r = get_info(path, &info);
313 if (r < 0) {
314 dout(10) << "error looking up info for " << path << ": "
315 << cpp_strerror(r) << dendl;
316 return r;
317 }
318
319 if (must_split(info)) {
320 dout(1) << __func__ << " " << path << " has " << info.objs
321 << " objects, starting split." << dendl;
322 r = initiate_split(path, info);
323 if (r < 0) {
324 dout(10) << "error initiating split on " << path << ": "
325 << cpp_strerror(r) << dendl;
326 return r;
327 }
328
329 r = complete_split(path, info);
330 dout(1) << __func__ << " " << path << " split completed."
331 << dendl;
332 if (r < 0) {
333 dout(10) << "error completing split on " << path << ": "
334 << cpp_strerror(r) << dendl;
335 return r;
336 }
337 }
338
339 vector<string> subdirs;
340 r = list_subdirs(path, &subdirs);
341 if (r < 0) {
342 dout(10) << "error listing subdirs of " << path << ": "
343 << cpp_strerror(r) << dendl;
344 return r;
345 }
346 for (vector<string>::const_iterator it = subdirs.begin();
347 it != subdirs.end(); ++it) {
348 vector<string> subdir_path(path);
349 subdir_path.push_back(*it);
350 r = split_dirs(subdir_path);
351 if (r < 0) {
352 return r;
353 }
354 }
355
356 return r;
357 }
358
359 int HashIndex::apply_layout_settings() {
360 vector<string> path;
361 dout(10) << __func__ << " split multiple = " << split_multiplier
362 << " merge threshold = " << merge_threshold
363 << " split rand factor = " << cct->_conf->filestore_split_rand_factor
364 << dendl;
365 int r = write_settings();
366 if (r < 0)
367 return r;
368 return split_dirs(path);
369 }
370
371 int HashIndex::_init() {
372 subdir_info_s info;
373 vector<string> path;
374 int r = set_info(path, info);
375 if (r < 0)
376 return r;
377 return write_settings();
378 }
379
380 int HashIndex::write_settings() {
381 if (cct->_conf->filestore_split_rand_factor > 0) {
382 settings.split_rand_factor = rand() % cct->_conf->filestore_split_rand_factor;
383 } else {
384 settings.split_rand_factor = 0;
385 }
386 vector<string> path;
387 bufferlist bl;
388 settings.encode(bl);
389 return add_attr_path(path, SETTINGS_ATTR, bl);
390 }
391
392 int HashIndex::read_settings() {
393 vector<string> path;
394 bufferlist bl;
395 int r = get_attr_path(path, SETTINGS_ATTR, bl);
396 if (r == -ENODATA)
397 return 0;
398 if (r < 0) {
399 derr << __func__ << " error reading settings: " << cpp_strerror(r) << dendl;
400 return r;
401 }
402 bufferlist::iterator it = bl.begin();
403 settings.decode(it);
404 dout(20) << __func__ << " split_rand_factor = " << settings.split_rand_factor << dendl;
405 return 0;
406 }
407
408 /* LFNIndex virtual method implementations */
409 int HashIndex::_created(const vector<string> &path,
410 const ghobject_t &oid,
411 const string &mangled_name) {
412 subdir_info_s info;
413 int r;
414 r = get_info(path, &info);
415 if (r < 0)
416 return r;
417 info.objs++;
418 r = set_info(path, info);
419 if (r < 0)
420 return r;
421
422 if (must_split(info)) {
423 dout(1) << __func__ << " " << path << " has " << info.objs
424 << " objects, starting split." << dendl;
425 int r = initiate_split(path, info);
426 if (r < 0)
427 return r;
428 r = complete_split(path, info);
429 dout(1) << __func__ << " " << path << " split completed."
430 << dendl;
431 return r;
432 } else {
433 return 0;
434 }
435 }
436
437 int HashIndex::_remove(const vector<string> &path,
438 const ghobject_t &oid,
439 const string &mangled_name) {
440 int r;
441 r = remove_object(path, oid);
442 if (r < 0)
443 return r;
444 subdir_info_s info;
445 r = get_info(path, &info);
446 if (r < 0)
447 return r;
448 info.objs--;
449 r = set_info(path, info);
450 if (r < 0)
451 return r;
452 if (must_merge(info)) {
453 r = initiate_merge(path, info);
454 if (r < 0)
455 return r;
456 return complete_merge(path, info);
457 } else {
458 return 0;
459 }
460 }
461
462 int HashIndex::_lookup(const ghobject_t &oid,
463 vector<string> *path,
464 string *mangled_name,
465 int *hardlink) {
466 vector<string> path_comp;
467 get_path_components(oid, &path_comp);
468 vector<string>::iterator next = path_comp.begin();
469 int exists;
470 while (1) {
471 int r = path_exists(*path, &exists);
472 if (r < 0)
473 return r;
474 if (!exists) {
475 if (path->empty())
476 return -ENOENT;
477 path->pop_back();
478 break;
479 }
480 if (next == path_comp.end())
481 break;
482 path->push_back(*(next++));
483 }
484 return get_mangled_name(*path, oid, mangled_name, hardlink);
485 }
486
487 int HashIndex::_collection_list_partial(const ghobject_t &start,
488 const ghobject_t &end,
489 int max_count,
490 vector<ghobject_t> *ls,
491 ghobject_t *next) {
492 vector<string> path;
493 ghobject_t _next;
494 if (!next)
495 next = &_next;
496 *next = start;
497 dout(20) << __func__ << " start:" << start << " end:" << end << "-" << max_count << " ls.size " << ls->size() << dendl;
498 return list_by_hash(path, end, max_count, next, ls);
499 }
500
501 int HashIndex::prep_delete() {
502 return recursive_remove(vector<string>());
503 }
504
505 int HashIndex::_pre_hash_collection(uint32_t pg_num, uint64_t expected_num_objs) {
506 int ret;
507 vector<string> path;
508 subdir_info_s root_info;
509 // Make sure there is neither objects nor sub-folders
510 // in this collection
511 ret = get_info(path, &root_info);
512 if (ret < 0)
513 return ret;
514
515 // Do the folder splitting first
516 ret = pre_split_folder(pg_num, expected_num_objs);
517 if (ret < 0)
518 return ret;
519 // Initialize the folder info starting from root
520 return init_split_folder(path, 0);
521 }
522
523 int HashIndex::pre_split_folder(uint32_t pg_num, uint64_t expected_num_objs)
524 {
525 // If folder merging is enabled (by setting the threshold positive),
526 // no need to split
527 if (merge_threshold > 0)
528 return 0;
529 const coll_t c = coll();
530 // Do not split if the expected number of objects in this collection is zero (by default)
531 if (expected_num_objs == 0)
532 return 0;
533
534 // Calculate the number of leaf folders (which actually store files)
535 // need to be created
536 const uint64_t objs_per_folder = ((uint64_t)(abs(merge_threshold)) * (uint64_t)split_multiplier + settings.split_rand_factor) * 16;
537 uint64_t leavies = expected_num_objs / objs_per_folder ;
538 // No need to split
539 if (leavies == 0 || expected_num_objs == objs_per_folder)
540 return 0;
541
542 spg_t spgid;
543 if (!c.is_pg_prefix(&spgid))
544 return -EINVAL;
545 const ps_t ps = spgid.pgid.ps();
546
547 // the most significant bits of pg_num
548 const int pg_num_bits = calc_num_bits(pg_num - 1);
549 ps_t tmp_id = ps;
550 // calculate the number of levels we only create one sub folder
551 int num = pg_num_bits / 4;
552 // pg num's hex value is like 1xxx,xxxx,xxxx but not 1111,1111,1111,
553 // so that splitting starts at level 3
554 if (pg_num_bits % 4 == 0 && pg_num < ((uint32_t)1 << pg_num_bits)) {
555 --num;
556 }
557
558 int ret;
559 // Start with creation that only has one subfolder
560 vector<string> paths;
561 int dump_num = num;
562 while (num-- > 0) {
563 ps_t v = tmp_id & 0x0000000f;
564 paths.push_back(to_hex(v));
565 ret = create_path(paths);
566 if (ret < 0 && ret != -EEXIST)
567 return ret;
568 tmp_id = tmp_id >> 4;
569 }
570
571 // Starting from here, we can split by creating multiple subfolders
572 const int left_bits = pg_num_bits - dump_num * 4;
573 // this variable denotes how many bits (for this level) that can be
574 // used for sub folder splitting
575 int split_bits = 4 - left_bits;
576 // the below logic is inspired by rados.h#ceph_stable_mod,
577 // it basically determines how many sub-folders should we
578 // create for splitting
579 assert(pg_num_bits > 0); // otherwise BAD_SHIFT
580 if (((1 << (pg_num_bits - 1)) | ps) >= pg_num) {
581 ++split_bits;
582 }
583 const uint32_t subs = (1 << split_bits);
584 // Calculate how many levels we create starting from here
585 int level = 0;
586 leavies /= subs;
587 while (leavies > 1) {
588 ++level;
589 leavies = leavies >> 4;
590 }
591 for (uint32_t i = 0; i < subs; ++i) {
592 assert(split_bits <= 4); // otherwise BAD_SHIFT
593 int v = tmp_id | (i << ((4 - split_bits) % 4));
594 paths.push_back(to_hex(v));
595 ret = create_path(paths);
596 if (ret < 0 && ret != -EEXIST)
597 return ret;
598 ret = recursive_create_path(paths, level);
599 if (ret < 0)
600 return ret;
601 paths.pop_back();
602 }
603 return 0;
604 }
605
606 int HashIndex::init_split_folder(vector<string> &path, uint32_t hash_level)
607 {
608 // Get the number of sub directories for the current path
609 vector<string> subdirs;
610 int ret = list_subdirs(path, &subdirs);
611 if (ret < 0)
612 return ret;
613 subdir_info_s info;
614 info.subdirs = subdirs.size();
615 info.hash_level = hash_level;
616 ret = set_info(path, info);
617 if (ret < 0)
618 return ret;
619 ret = fsync_dir(path);
620 if (ret < 0)
621 return ret;
622
623 // Do the same for subdirs
624 vector<string>::const_iterator iter;
625 for (iter = subdirs.begin(); iter != subdirs.end(); ++iter) {
626 path.push_back(*iter);
627 ret = init_split_folder(path, hash_level + 1);
628 if (ret < 0)
629 return ret;
630 path.pop_back();
631 }
632 return 0;
633 }
634
635 int HashIndex::recursive_create_path(vector<string>& path, int level)
636 {
637 if (level == 0)
638 return 0;
639 for (int i = 0; i < 16; ++i) {
640 path.push_back(to_hex(i));
641 int ret = create_path(path);
642 if (ret < 0 && ret != -EEXIST)
643 return ret;
644 ret = recursive_create_path(path, level - 1);
645 if (ret < 0)
646 return ret;
647 path.pop_back();
648 }
649 return 0;
650 }
651
652 int HashIndex::recursive_remove(const vector<string> &path) {
653 return _recursive_remove(path, true);
654 }
655
656 int HashIndex::_recursive_remove(const vector<string> &path, bool top) {
657 vector<string> subdirs;
658 dout(20) << __func__ << " path=" << path << dendl;
659 int r = list_subdirs(path, &subdirs);
660 if (r < 0)
661 return r;
662 map<string, ghobject_t> objects;
663 r = list_objects(path, 0, 0, &objects);
664 if (r < 0)
665 return r;
666 if (!objects.empty())
667 return -ENOTEMPTY;
668 vector<string> subdir(path);
669 for (vector<string>::iterator i = subdirs.begin();
670 i != subdirs.end();
671 ++i) {
672 subdir.push_back(*i);
673 r = _recursive_remove(subdir, false);
674 if (r < 0)
675 return r;
676 subdir.pop_back();
677 }
678 if (top)
679 return 0;
680 else
681 return remove_path(path);
682 }
683
684 int HashIndex::start_col_split(const vector<string> &path) {
685 bufferlist bl;
686 InProgressOp op_tag(InProgressOp::COL_SPLIT, path);
687 op_tag.encode(bl);
688 int r = add_attr_path(vector<string>(), IN_PROGRESS_OP_TAG, bl);
689 if (r < 0)
690 return r;
691 return fsync_dir(vector<string>());
692 }
693
694 int HashIndex::start_split(const vector<string> &path) {
695 bufferlist bl;
696 InProgressOp op_tag(InProgressOp::SPLIT, path);
697 op_tag.encode(bl);
698 int r = add_attr_path(vector<string>(), IN_PROGRESS_OP_TAG, bl);
699 if (r < 0)
700 return r;
701 return fsync_dir(vector<string>());
702 }
703
704 int HashIndex::start_merge(const vector<string> &path) {
705 bufferlist bl;
706 InProgressOp op_tag(InProgressOp::MERGE, path);
707 op_tag.encode(bl);
708 int r = add_attr_path(vector<string>(), IN_PROGRESS_OP_TAG, bl);
709 if (r < 0)
710 return r;
711 return fsync_dir(vector<string>());
712 }
713
714 int HashIndex::end_split_or_merge(const vector<string> &path) {
715 return remove_attr_path(vector<string>(), IN_PROGRESS_OP_TAG);
716 }
717
718 int HashIndex::get_info(const vector<string> &path, subdir_info_s *info) {
719 bufferlist buf;
720 int r = get_attr_path(path, SUBDIR_ATTR, buf);
721 if (r < 0)
722 return r;
723 bufferlist::iterator bufiter = buf.begin();
724 info->decode(bufiter);
725 assert(path.size() == (unsigned)info->hash_level);
726 return 0;
727 }
728
729 int HashIndex::set_info(const vector<string> &path, const subdir_info_s &info) {
730 bufferlist buf;
731 assert(path.size() == (unsigned)info.hash_level);
732 info.encode(buf);
733 return add_attr_path(path, SUBDIR_ATTR, buf);
734 }
735
736 bool HashIndex::must_merge(const subdir_info_s &info) {
737 return (info.hash_level > 0 &&
738 merge_threshold > 0 &&
739 info.objs < (unsigned)merge_threshold &&
740 info.subdirs == 0);
741 }
742
743 bool HashIndex::must_split(const subdir_info_s &info) {
744 return (info.hash_level < (unsigned)MAX_HASH_LEVEL &&
745 info.objs > ((unsigned)(abs(merge_threshold) * split_multiplier + settings.split_rand_factor) * 16));
746
747 }
748
749 int HashIndex::initiate_merge(const vector<string> &path, subdir_info_s info) {
750 return start_merge(path);
751 }
752
753 int HashIndex::complete_merge(const vector<string> &path, subdir_info_s info) {
754 vector<string> dst = path;
755 dst.pop_back();
756 subdir_info_s dstinfo;
757 int r, exists;
758 r = path_exists(path, &exists);
759 if (r < 0)
760 return r;
761 r = get_info(dst, &dstinfo);
762 if (r < 0)
763 return r;
764 if (exists) {
765 r = move_objects(path, dst);
766 if (r < 0)
767 return r;
768 r = reset_attr(dst);
769 if (r < 0)
770 return r;
771 r = remove_path(path);
772 if (r < 0)
773 return r;
774 }
775 if (must_merge(dstinfo)) {
776 r = initiate_merge(dst, dstinfo);
777 if (r < 0)
778 return r;
779 r = fsync_dir(dst);
780 if (r < 0)
781 return r;
782 return complete_merge(dst, dstinfo);
783 }
784 r = fsync_dir(dst);
785 if (r < 0)
786 return r;
787 return end_split_or_merge(path);
788 }
789
790 int HashIndex::initiate_split(const vector<string> &path, subdir_info_s info) {
791 return start_split(path);
792 }
793
794 int HashIndex::complete_split(const vector<string> &path, subdir_info_s info) {
795 int level = info.hash_level;
796 map<string, ghobject_t> objects;
797 vector<string> dst = path;
798 int r;
799 dst.push_back("");
800 r = list_objects(path, 0, 0, &objects);
801 if (r < 0)
802 return r;
803 vector<string> subdirs_vec;
804 r = list_subdirs(path, &subdirs_vec);
805 if (r < 0)
806 return r;
807 set<string> subdirs;
808 subdirs.insert(subdirs_vec.begin(), subdirs_vec.end());
809 map<string, map<string, ghobject_t> > mapped;
810 map<string, ghobject_t> moved;
811 int num_moved = 0;
812 for (map<string, ghobject_t>::iterator i = objects.begin();
813 i != objects.end();
814 ++i) {
815 vector<string> new_path;
816 get_path_components(i->second, &new_path);
817 mapped[new_path[level]][i->first] = i->second;
818 }
819 for (map<string, map<string, ghobject_t> >::iterator i = mapped.begin();
820 i != mapped.end();
821 ) {
822 dst[level] = i->first;
823 /* If the info already exists, it must be correct,
824 * we may be picking up a partially finished split */
825 subdir_info_s temp;
826 // subdir has already been fully copied
827 if (subdirs.count(i->first) && !get_info(dst, &temp)) {
828 for (map<string, ghobject_t>::iterator j = i->second.begin();
829 j != i->second.end();
830 ++j) {
831 moved[j->first] = j->second;
832 num_moved++;
833 objects.erase(j->first);
834 }
835 ++i;
836 continue;
837 }
838
839 subdir_info_s info_new;
840 info_new.objs = i->second.size();
841 info_new.subdirs = 0;
842 info_new.hash_level = level + 1;
843 if (must_merge(info_new) && !subdirs.count(i->first)) {
844 mapped.erase(i++);
845 continue;
846 }
847
848 // Subdir doesn't yet exist
849 if (!subdirs.count(i->first)) {
850 info.subdirs += 1;
851 r = create_path(dst);
852 if (r < 0)
853 return r;
854 } // else subdir has been created but only partially copied
855
856 for (map<string, ghobject_t>::iterator j = i->second.begin();
857 j != i->second.end();
858 ++j) {
859 moved[j->first] = j->second;
860 num_moved++;
861 objects.erase(j->first);
862 r = link_object(path, dst, j->second, j->first);
863 // May be a partially finished split
864 if (r < 0 && r != -EEXIST) {
865 return r;
866 }
867 }
868
869 r = fsync_dir(dst);
870 if (r < 0)
871 return r;
872
873 // Presence of info must imply that all objects have been copied
874 r = set_info(dst, info_new);
875 if (r < 0)
876 return r;
877
878 r = fsync_dir(dst);
879 if (r < 0)
880 return r;
881
882 ++i;
883 }
884 r = remove_objects(path, moved, &objects);
885 if (r < 0)
886 return r;
887 info.objs = objects.size();
888 r = reset_attr(path);
889 if (r < 0)
890 return r;
891 r = fsync_dir(path);
892 if (r < 0)
893 return r;
894 return end_split_or_merge(path);
895 }
896
897 void HashIndex::get_path_components(const ghobject_t &oid,
898 vector<string> *path) {
899 char buf[MAX_HASH_LEVEL + 1];
900 snprintf(buf, sizeof(buf), "%.*X", MAX_HASH_LEVEL, (uint32_t)oid.hobj.get_nibblewise_key());
901
902 // Path components are the hex characters of oid.hobj.hash, least
903 // significant first
904 for (int i = 0; i < MAX_HASH_LEVEL; ++i) {
905 path->push_back(string(&buf[i], 1));
906 }
907 }
908
909 string HashIndex::get_hash_str(uint32_t hash) {
910 char buf[MAX_HASH_LEVEL + 1];
911 snprintf(buf, sizeof(buf), "%.*X", MAX_HASH_LEVEL, hash);
912 string retval;
913 for (int i = 0; i < MAX_HASH_LEVEL; ++i) {
914 retval.push_back(buf[MAX_HASH_LEVEL - 1 - i]);
915 }
916 return retval;
917 }
918
919 string HashIndex::get_path_str(const ghobject_t &oid) {
920 assert(!oid.is_max());
921 return get_hash_str(oid.hobj.get_hash());
922 }
923
924 uint32_t HashIndex::hash_prefix_to_hash(string prefix) {
925 while (prefix.size() < sizeof(uint32_t) * 2) {
926 prefix.push_back('0');
927 }
928 uint32_t hash;
929 sscanf(prefix.c_str(), "%x", &hash);
930 // nibble reverse
931 hash = ((hash & 0x0f0f0f0f) << 4) | ((hash & 0xf0f0f0f0) >> 4);
932 hash = ((hash & 0x00ff00ff) << 8) | ((hash & 0xff00ff00) >> 8);
933 hash = ((hash & 0x0000ffff) << 16) | ((hash & 0xffff0000) >> 16);
934 return hash;
935 }
936
937 int HashIndex::get_path_contents_by_hash_bitwise(
938 const vector<string> &path,
939 const ghobject_t *next_object,
940 set<string, CmpHexdigitStringBitwise> *hash_prefixes,
941 set<pair<string, ghobject_t>, CmpPairBitwise> *objects)
942 {
943 map<string, ghobject_t> rev_objects;
944 int r;
945 r = list_objects(path, 0, 0, &rev_objects);
946 if (r < 0)
947 return r;
948 // bitwise sort
949 for (map<string, ghobject_t>::iterator i = rev_objects.begin();
950 i != rev_objects.end();
951 ++i) {
952 if (next_object && i->second < *next_object)
953 continue;
954 string hash_prefix = get_path_str(i->second);
955 hash_prefixes->insert(hash_prefix);
956 objects->insert(pair<string, ghobject_t>(hash_prefix, i->second));
957 }
958 vector<string> subdirs;
959 r = list_subdirs(path, &subdirs);
960 if (r < 0)
961 return r;
962
963 // sort subdirs bitwise (by reversing hex digit nibbles)
964 std::sort(subdirs.begin(), subdirs.end(), cmp_hexdigit_bitwise);
965
966 // Local to this function, we will convert the prefix strings
967 // (previously simply the reversed hex digits) to also have each
968 // digit's nibbles reversed. This will make the strings sort
969 // bitwise.
970 string cur_prefix;
971 for (vector<string>::const_iterator i = path.begin();
972 i != path.end();
973 ++i) {
974 cur_prefix.append(reverse_hexdigit_bits_string(*i));
975 }
976 string next_object_string;
977 if (next_object)
978 next_object_string = reverse_hexdigit_bits_string(get_path_str(*next_object));
979 for (vector<string>::iterator i = subdirs.begin();
980 i != subdirs.end();
981 ++i) {
982 string candidate = cur_prefix + reverse_hexdigit_bits_string(*i);
983 if (next_object) {
984 if (next_object->is_max())
985 continue;
986 if (candidate < next_object_string.substr(0, candidate.size()))
987 continue;
988 }
989 // re-reverse the hex digit nibbles for the caller
990 hash_prefixes->insert(reverse_hexdigit_bits_string(candidate));
991 }
992 return 0;
993 }
994
995 int HashIndex::list_by_hash(const vector<string> &path,
996 const ghobject_t &end,
997 int max_count,
998 ghobject_t *next,
999 vector<ghobject_t> *out)
1000 {
1001 assert(out);
1002 return list_by_hash_bitwise(path, end, max_count, next, out);
1003 }
1004
1005 int HashIndex::list_by_hash_bitwise(
1006 const vector<string> &path,
1007 const ghobject_t& end,
1008 int max_count,
1009 ghobject_t *next,
1010 vector<ghobject_t> *out)
1011 {
1012 vector<string> next_path = path;
1013 next_path.push_back("");
1014 set<string, CmpHexdigitStringBitwise> hash_prefixes;
1015 set<pair<string, ghobject_t>, CmpPairBitwise> objects;
1016 int r = get_path_contents_by_hash_bitwise(path,
1017 next,
1018 &hash_prefixes,
1019 &objects);
1020 if (r < 0)
1021 return r;
1022 for (set<string, CmpHexdigitStringBitwise>::iterator i = hash_prefixes.begin();
1023 i != hash_prefixes.end();
1024 ++i) {
1025 dout(20) << __func__ << " prefix " << *i << dendl;
1026 set<pair<string, ghobject_t>, CmpPairBitwise>::iterator j = objects.lower_bound(
1027 make_pair(*i, ghobject_t()));
1028 if (j == objects.end() || j->first != *i) {
1029 *(next_path.rbegin()) = *(i->rbegin());
1030 ghobject_t next_recurse;
1031 if (next)
1032 next_recurse = *next;
1033 r = list_by_hash_bitwise(next_path,
1034 end,
1035 max_count,
1036 &next_recurse,
1037 out);
1038
1039 if (r < 0)
1040 return r;
1041 if (!next_recurse.is_max()) {
1042 if (next)
1043 *next = next_recurse;
1044 return 0;
1045 }
1046 } else {
1047 while (j != objects.end() && j->first == *i) {
1048 if (max_count > 0 && out->size() == (unsigned)max_count) {
1049 if (next)
1050 *next = j->second;
1051 return 0;
1052 }
1053 if (j->second >= end) {
1054 if (next)
1055 *next = j->second;
1056 return 0;
1057 }
1058 if (!next || j->second >= *next) {
1059 dout(20) << __func__ << " prefix " << *i << " ob " << j->second << dendl;
1060 out->push_back(j->second);
1061 }
1062 ++j;
1063 }
1064 }
1065 }
1066 if (next)
1067 *next = ghobject_t::get_max();
1068 return 0;
1069 }
1070
1071