]> git.proxmox.com Git - ceph.git/blob - ceph/src/os/filestore/HashIndex.cc
0ebec36b6eeb03b28d8cb79d795555679c518e1d
[ceph.git] / ceph / src / os / filestore / HashIndex.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15 #include "include/types.h"
16 #include "include/buffer.h"
17 #include "osd/osd_types.h"
18 #include <errno.h>
19
20 #include "HashIndex.h"
21
22 #include "common/errno.h"
23 #include "common/debug.h"
24 #define dout_context cct
25 #define dout_subsys ceph_subsys_filestore
26
27 const string HashIndex::SUBDIR_ATTR = "contents";
28 const string HashIndex::IN_PROGRESS_OP_TAG = "in_progress_op";
29
30 /// hex digit to integer value
31 int hex_to_int(char c)
32 {
33 if (c >= '0' && c <= '9')
34 return c - '0';
35 if (c >= 'A' && c <= 'F')
36 return c - 'A' + 10;
37 ceph_abort();
38 }
39
40 /// int value to hex digit
41 char int_to_hex(int v)
42 {
43 assert(v < 16);
44 if (v < 10)
45 return '0' + v;
46 return 'A' + v - 10;
47 }
48
49 /// reverse bits in a nibble (0..15)
50 int reverse_nibble_bits(int in)
51 {
52 assert(in < 16);
53 return
54 ((in & 8) >> 3) |
55 ((in & 4) >> 1) |
56 ((in & 2) << 1) |
57 ((in & 1) << 3);
58 }
59
60 /// reverse nibble bits in a hex digit
61 char reverse_hexdigit_bits(char c)
62 {
63 return int_to_hex(reverse_nibble_bits(hex_to_int(c)));
64 }
65
66 /// reverse nibble bits in a hex string
67 string reverse_hexdigit_bits_string(string s)
68 {
69 for (unsigned i=0; i<s.size(); ++i)
70 s[i] = reverse_hexdigit_bits(s[i]);
71 return s;
72 }
73
74 /// compare hex digit (as length 1 string) bitwise
75 bool cmp_hexdigit_bitwise(const string& l, const string& r)
76 {
77 assert(l.length() == 1 && r.length() == 1);
78 int lv = hex_to_int(l[0]);
79 int rv = hex_to_int(r[0]);
80 assert(lv < 16);
81 assert(rv < 16);
82 return reverse_nibble_bits(lv) < reverse_nibble_bits(rv);
83 }
84
85 /// compare hex digit string bitwise
86 bool cmp_hexdigit_string_bitwise(const string& l, const string& r)
87 {
88 string ll = reverse_hexdigit_bits_string(l);
89 string rr = reverse_hexdigit_bits_string(r);
90 return ll < rr;
91 }
92
93 int HashIndex::cleanup() {
94 bufferlist bl;
95 int r = get_attr_path(vector<string>(), IN_PROGRESS_OP_TAG, bl);
96 if (r < 0) {
97 // No in progress operations!
98 return 0;
99 }
100 bufferlist::iterator i = bl.begin();
101 InProgressOp in_progress(i);
102 subdir_info_s info;
103 r = get_info(in_progress.path, &info);
104 if (r == -ENOENT) {
105 return end_split_or_merge(in_progress.path);
106 } else if (r < 0) {
107 return r;
108 }
109
110 if (in_progress.is_split())
111 return complete_split(in_progress.path, info);
112 else if (in_progress.is_merge())
113 return complete_merge(in_progress.path, info);
114 else if (in_progress.is_col_split()) {
115 for (vector<string>::iterator i = in_progress.path.begin();
116 i != in_progress.path.end();
117 ++i) {
118 vector<string> path(in_progress.path.begin(), i);
119 int r = reset_attr(path);
120 if (r < 0)
121 return r;
122 }
123 return 0;
124 }
125 else
126 return -EINVAL;
127 }
128
129 int HashIndex::reset_attr(
130 const vector<string> &path)
131 {
132 int exists = 0;
133 int r = path_exists(path, &exists);
134 if (r < 0)
135 return r;
136 if (!exists)
137 return 0;
138 map<string, ghobject_t> objects;
139 vector<string> subdirs;
140 r = list_objects(path, 0, 0, &objects);
141 if (r < 0)
142 return r;
143 r = list_subdirs(path, &subdirs);
144 if (r < 0)
145 return r;
146
147 subdir_info_s info;
148 info.hash_level = path.size();
149 info.objs = objects.size();
150 info.subdirs = subdirs.size();
151 return set_info(path, info);
152 }
153
154 int HashIndex::col_split_level(
155 HashIndex &from,
156 HashIndex &to,
157 const vector<string> &path,
158 uint32_t inbits,
159 uint32_t match,
160 unsigned *mkdirred)
161 {
162 /* For each subdir, move, recurse, or ignore based on comparing the low order
163 * bits of the hash represented by the subdir path with inbits, match passed
164 * in.
165 */
166 vector<string> subdirs;
167 int r = from.list_subdirs(path, &subdirs);
168 if (r < 0)
169 return r;
170 map<string, ghobject_t> objects;
171 r = from.list_objects(path, 0, 0, &objects);
172 if (r < 0)
173 return r;
174
175 set<string> to_move;
176 for (vector<string>::iterator i = subdirs.begin();
177 i != subdirs.end();
178 ++i) {
179 uint32_t bits = 0;
180 uint32_t hash = 0;
181 vector<string> sub_path(path.begin(), path.end());
182 sub_path.push_back(*i);
183 path_to_hobject_hash_prefix(sub_path, &bits, &hash);
184 if (bits < inbits) {
185 if (hobject_t::match_hash(hash, bits, match)) {
186 r = col_split_level(
187 from,
188 to,
189 sub_path,
190 inbits,
191 match,
192 mkdirred);
193 if (r < 0)
194 return r;
195 if (*mkdirred > path.size())
196 *mkdirred = path.size();
197 } // else, skip, doesn't need to be moved or recursed into
198 } else {
199 if (hobject_t::match_hash(hash, inbits, match)) {
200 to_move.insert(*i);
201 }
202 } // else, skip, doesn't need to be moved or recursed into
203 }
204
205 /* Then, do the same for each object */
206 map<string, ghobject_t> objs_to_move;
207 for (map<string, ghobject_t>::iterator i = objects.begin();
208 i != objects.end();
209 ++i) {
210 if (i->second.match(inbits, match)) {
211 objs_to_move.insert(*i);
212 }
213 }
214
215 if (objs_to_move.empty() && to_move.empty())
216 return 0;
217
218 // Make parent directories as needed
219 while (*mkdirred < path.size()) {
220 ++*mkdirred;
221 int exists = 0;
222 vector<string> creating_path(path.begin(), path.begin()+*mkdirred);
223 r = to.path_exists(creating_path, &exists);
224 if (r < 0)
225 return r;
226 if (exists)
227 continue;
228 subdir_info_s info;
229 info.objs = 0;
230 info.subdirs = 0;
231 info.hash_level = creating_path.size();
232 if (*mkdirred < path.size() - 1)
233 info.subdirs = 1;
234 r = to.start_col_split(creating_path);
235 if (r < 0)
236 return r;
237 r = to.create_path(creating_path);
238 if (r < 0)
239 return r;
240 r = to.set_info(creating_path, info);
241 if (r < 0)
242 return r;
243 r = to.end_split_or_merge(creating_path);
244 if (r < 0)
245 return r;
246 }
247
248 subdir_info_s from_info;
249 subdir_info_s to_info;
250 r = from.get_info(path, &from_info);
251 if (r < 0)
252 return r;
253 r = to.get_info(path, &to_info);
254 if (r < 0)
255 return r;
256
257 from.start_col_split(path);
258 to.start_col_split(path);
259
260 // Do subdir moves
261 for (set<string>::iterator i = to_move.begin();
262 i != to_move.end();
263 ++i) {
264 from_info.subdirs--;
265 to_info.subdirs++;
266 r = move_subdir(from, to, path, *i);
267 if (r < 0)
268 return r;
269 }
270
271 for (map<string, ghobject_t>::iterator i = objs_to_move.begin();
272 i != objs_to_move.end();
273 ++i) {
274 from_info.objs--;
275 to_info.objs++;
276 r = move_object(from, to, path, *i);
277 if (r < 0)
278 return r;
279 }
280
281
282 r = to.set_info(path, to_info);
283 if (r < 0)
284 return r;
285 r = from.set_info(path, from_info);
286 if (r < 0)
287 return r;
288 from.end_split_or_merge(path);
289 to.end_split_or_merge(path);
290 return 0;
291 }
292
293 int HashIndex::_split(
294 uint32_t match,
295 uint32_t bits,
296 CollectionIndex* dest) {
297 assert(collection_version() == dest->collection_version());
298 unsigned mkdirred = 0;
299 return col_split_level(
300 *this,
301 *static_cast<HashIndex*>(dest),
302 vector<string>(),
303 bits,
304 match,
305 &mkdirred);
306 }
307
308 int HashIndex::split_dirs(const vector<string> &path) {
309 dout(20) << __func__ << " " << path << dendl;
310 subdir_info_s info;
311 int r = get_info(path, &info);
312 if (r < 0) {
313 dout(10) << "error looking up info for " << path << ": "
314 << cpp_strerror(r) << dendl;
315 return r;
316 }
317
318 if (must_split(info)) {
319 dout(1) << __func__ << " " << path << " has " << info.objs
320 << " objects, starting split." << dendl;
321 r = initiate_split(path, info);
322 if (r < 0) {
323 dout(10) << "error initiating split on " << path << ": "
324 << cpp_strerror(r) << dendl;
325 return r;
326 }
327
328 r = complete_split(path, info);
329 dout(1) << __func__ << " " << path << " split completed."
330 << dendl;
331 if (r < 0) {
332 dout(10) << "error completing split on " << path << ": "
333 << cpp_strerror(r) << dendl;
334 return r;
335 }
336 }
337
338 vector<string> subdirs;
339 r = list_subdirs(path, &subdirs);
340 if (r < 0) {
341 dout(10) << "error listing subdirs of " << path << ": "
342 << cpp_strerror(r) << dendl;
343 return r;
344 }
345 for (vector<string>::const_iterator it = subdirs.begin();
346 it != subdirs.end(); ++it) {
347 vector<string> subdir_path(path);
348 subdir_path.push_back(*it);
349 r = split_dirs(subdir_path);
350 if (r < 0) {
351 return r;
352 }
353 }
354
355 return r;
356 }
357
358 int HashIndex::apply_layout_settings() {
359 vector<string> path;
360 dout(10) << __func__ << " split multiple = " << split_multiplier
361 << " merge threshold = " << merge_threshold << dendl;
362 return split_dirs(path);
363 }
364
365 int HashIndex::_init() {
366 subdir_info_s info;
367 vector<string> path;
368 return set_info(path, info);
369 }
370
371 /* LFNIndex virtual method implementations */
372 int HashIndex::_created(const vector<string> &path,
373 const ghobject_t &oid,
374 const string &mangled_name) {
375 subdir_info_s info;
376 int r;
377 r = get_info(path, &info);
378 if (r < 0)
379 return r;
380 info.objs++;
381 r = set_info(path, info);
382 if (r < 0)
383 return r;
384
385 if (must_split(info)) {
386 dout(1) << __func__ << " " << path << " has " << info.objs
387 << " objects, starting split." << dendl;
388 int r = initiate_split(path, info);
389 if (r < 0)
390 return r;
391 r = complete_split(path, info);
392 dout(1) << __func__ << " " << path << " split completed."
393 << dendl;
394 return r;
395 } else {
396 return 0;
397 }
398 }
399
400 int HashIndex::_remove(const vector<string> &path,
401 const ghobject_t &oid,
402 const string &mangled_name) {
403 int r;
404 r = remove_object(path, oid);
405 if (r < 0)
406 return r;
407 subdir_info_s info;
408 r = get_info(path, &info);
409 if (r < 0)
410 return r;
411 info.objs--;
412 r = set_info(path, info);
413 if (r < 0)
414 return r;
415 if (must_merge(info)) {
416 r = initiate_merge(path, info);
417 if (r < 0)
418 return r;
419 return complete_merge(path, info);
420 } else {
421 return 0;
422 }
423 }
424
425 int HashIndex::_lookup(const ghobject_t &oid,
426 vector<string> *path,
427 string *mangled_name,
428 int *hardlink) {
429 vector<string> path_comp;
430 get_path_components(oid, &path_comp);
431 vector<string>::iterator next = path_comp.begin();
432 int exists;
433 while (1) {
434 int r = path_exists(*path, &exists);
435 if (r < 0)
436 return r;
437 if (!exists) {
438 if (path->empty())
439 return -ENOENT;
440 path->pop_back();
441 break;
442 }
443 if (next == path_comp.end())
444 break;
445 path->push_back(*(next++));
446 }
447 return get_mangled_name(*path, oid, mangled_name, hardlink);
448 }
449
450 int HashIndex::_collection_list_partial(const ghobject_t &start,
451 const ghobject_t &end,
452 int max_count,
453 vector<ghobject_t> *ls,
454 ghobject_t *next) {
455 vector<string> path;
456 ghobject_t _next;
457 if (!next)
458 next = &_next;
459 *next = start;
460 dout(20) << __func__ << " start:" << start << " end:" << end << "-" << max_count << " ls.size " << ls->size() << dendl;
461 return list_by_hash(path, end, max_count, next, ls);
462 }
463
464 int HashIndex::prep_delete() {
465 return recursive_remove(vector<string>());
466 }
467
468 int HashIndex::_pre_hash_collection(uint32_t pg_num, uint64_t expected_num_objs) {
469 int ret;
470 vector<string> path;
471 subdir_info_s root_info;
472 // Make sure there is neither objects nor sub-folders
473 // in this collection
474 ret = get_info(path, &root_info);
475 if (ret < 0)
476 return ret;
477
478 // Do the folder splitting first
479 ret = pre_split_folder(pg_num, expected_num_objs);
480 if (ret < 0)
481 return ret;
482 // Initialize the folder info starting from root
483 return init_split_folder(path, 0);
484 }
485
486 int HashIndex::pre_split_folder(uint32_t pg_num, uint64_t expected_num_objs)
487 {
488 // If folder merging is enabled (by setting the threshold positive),
489 // no need to split
490 if (merge_threshold > 0)
491 return 0;
492 const coll_t c = coll();
493 // Do not split if the expected number of objects in this collection is zero (by default)
494 if (expected_num_objs == 0)
495 return 0;
496
497 // Calculate the number of leaf folders (which actually store files)
498 // need to be created
499 const uint64_t objs_per_folder = (uint64_t)(abs(merge_threshold)) * (uint64_t)split_multiplier * 16;
500 uint64_t leavies = expected_num_objs / objs_per_folder ;
501 // No need to split
502 if (leavies == 0 || expected_num_objs == objs_per_folder)
503 return 0;
504
505 spg_t spgid;
506 if (!c.is_pg_prefix(&spgid))
507 return -EINVAL;
508 const ps_t ps = spgid.pgid.ps();
509
510 // the most significant bits of pg_num
511 const int pg_num_bits = calc_num_bits(pg_num - 1);
512 ps_t tmp_id = ps;
513 // calculate the number of levels we only create one sub folder
514 int num = pg_num_bits / 4;
515 // pg num's hex value is like 1xxx,xxxx,xxxx but not 1111,1111,1111,
516 // so that splitting starts at level 3
517 if (pg_num_bits % 4 == 0 && pg_num < ((uint32_t)1 << pg_num_bits)) {
518 --num;
519 }
520
521 int ret;
522 // Start with creation that only has one subfolder
523 vector<string> paths;
524 int dump_num = num;
525 while (num-- > 0) {
526 ps_t v = tmp_id & 0x0000000f;
527 paths.push_back(to_hex(v));
528 ret = create_path(paths);
529 if (ret < 0 && ret != -EEXIST)
530 return ret;
531 tmp_id = tmp_id >> 4;
532 }
533
534 // Starting from here, we can split by creating multiple subfolders
535 const int left_bits = pg_num_bits - dump_num * 4;
536 // this variable denotes how many bits (for this level) that can be
537 // used for sub folder splitting
538 int split_bits = 4 - left_bits;
539 // the below logic is inspired by rados.h#ceph_stable_mod,
540 // it basically determines how many sub-folders should we
541 // create for splitting
542 assert(pg_num_bits > 0); // otherwise BAD_SHIFT
543 if (((1 << (pg_num_bits - 1)) | ps) >= pg_num) {
544 ++split_bits;
545 }
546 const uint32_t subs = (1 << split_bits);
547 // Calculate how many levels we create starting from here
548 int level = 0;
549 leavies /= subs;
550 while (leavies > 1) {
551 ++level;
552 leavies = leavies >> 4;
553 }
554 for (uint32_t i = 0; i < subs; ++i) {
555 assert(split_bits <= 4); // otherwise BAD_SHIFT
556 int v = tmp_id | (i << ((4 - split_bits) % 4));
557 paths.push_back(to_hex(v));
558 ret = create_path(paths);
559 if (ret < 0 && ret != -EEXIST)
560 return ret;
561 ret = recursive_create_path(paths, level);
562 if (ret < 0)
563 return ret;
564 paths.pop_back();
565 }
566 return 0;
567 }
568
569 int HashIndex::init_split_folder(vector<string> &path, uint32_t hash_level)
570 {
571 // Get the number of sub directories for the current path
572 vector<string> subdirs;
573 int ret = list_subdirs(path, &subdirs);
574 if (ret < 0)
575 return ret;
576 subdir_info_s info;
577 info.subdirs = subdirs.size();
578 info.hash_level = hash_level;
579 ret = set_info(path, info);
580 if (ret < 0)
581 return ret;
582 ret = fsync_dir(path);
583 if (ret < 0)
584 return ret;
585
586 // Do the same for subdirs
587 vector<string>::const_iterator iter;
588 for (iter = subdirs.begin(); iter != subdirs.end(); ++iter) {
589 path.push_back(*iter);
590 ret = init_split_folder(path, hash_level + 1);
591 if (ret < 0)
592 return ret;
593 path.pop_back();
594 }
595 return 0;
596 }
597
598 int HashIndex::recursive_create_path(vector<string>& path, int level)
599 {
600 if (level == 0)
601 return 0;
602 for (int i = 0; i < 16; ++i) {
603 path.push_back(to_hex(i));
604 int ret = create_path(path);
605 if (ret < 0 && ret != -EEXIST)
606 return ret;
607 ret = recursive_create_path(path, level - 1);
608 if (ret < 0)
609 return ret;
610 path.pop_back();
611 }
612 return 0;
613 }
614
615 int HashIndex::recursive_remove(const vector<string> &path) {
616 return _recursive_remove(path, true);
617 }
618
619 int HashIndex::_recursive_remove(const vector<string> &path, bool top) {
620 vector<string> subdirs;
621 dout(20) << __func__ << " path=" << path << dendl;
622 int r = list_subdirs(path, &subdirs);
623 if (r < 0)
624 return r;
625 map<string, ghobject_t> objects;
626 r = list_objects(path, 0, 0, &objects);
627 if (r < 0)
628 return r;
629 if (!objects.empty())
630 return -ENOTEMPTY;
631 vector<string> subdir(path);
632 for (vector<string>::iterator i = subdirs.begin();
633 i != subdirs.end();
634 ++i) {
635 subdir.push_back(*i);
636 r = _recursive_remove(subdir, false);
637 if (r < 0)
638 return r;
639 subdir.pop_back();
640 }
641 if (top)
642 return 0;
643 else
644 return remove_path(path);
645 }
646
647 int HashIndex::start_col_split(const vector<string> &path) {
648 bufferlist bl;
649 InProgressOp op_tag(InProgressOp::COL_SPLIT, path);
650 op_tag.encode(bl);
651 int r = add_attr_path(vector<string>(), IN_PROGRESS_OP_TAG, bl);
652 if (r < 0)
653 return r;
654 return fsync_dir(vector<string>());
655 }
656
657 int HashIndex::start_split(const vector<string> &path) {
658 bufferlist bl;
659 InProgressOp op_tag(InProgressOp::SPLIT, path);
660 op_tag.encode(bl);
661 int r = add_attr_path(vector<string>(), IN_PROGRESS_OP_TAG, bl);
662 if (r < 0)
663 return r;
664 return fsync_dir(vector<string>());
665 }
666
667 int HashIndex::start_merge(const vector<string> &path) {
668 bufferlist bl;
669 InProgressOp op_tag(InProgressOp::MERGE, path);
670 op_tag.encode(bl);
671 int r = add_attr_path(vector<string>(), IN_PROGRESS_OP_TAG, bl);
672 if (r < 0)
673 return r;
674 return fsync_dir(vector<string>());
675 }
676
677 int HashIndex::end_split_or_merge(const vector<string> &path) {
678 return remove_attr_path(vector<string>(), IN_PROGRESS_OP_TAG);
679 }
680
681 int HashIndex::get_info(const vector<string> &path, subdir_info_s *info) {
682 bufferlist buf;
683 int r = get_attr_path(path, SUBDIR_ATTR, buf);
684 if (r < 0)
685 return r;
686 bufferlist::iterator bufiter = buf.begin();
687 info->decode(bufiter);
688 assert(path.size() == (unsigned)info->hash_level);
689 return 0;
690 }
691
692 int HashIndex::set_info(const vector<string> &path, const subdir_info_s &info) {
693 bufferlist buf;
694 assert(path.size() == (unsigned)info.hash_level);
695 info.encode(buf);
696 return add_attr_path(path, SUBDIR_ATTR, buf);
697 }
698
699 bool HashIndex::must_merge(const subdir_info_s &info) {
700 return (info.hash_level > 0 &&
701 merge_threshold > 0 &&
702 info.objs < (unsigned)merge_threshold &&
703 info.subdirs == 0);
704 }
705
706 bool HashIndex::must_split(const subdir_info_s &info) {
707 return (info.hash_level < (unsigned)MAX_HASH_LEVEL &&
708 info.objs > ((unsigned)(abs(merge_threshold)) * 16 * split_multiplier));
709
710 }
711
712 int HashIndex::initiate_merge(const vector<string> &path, subdir_info_s info) {
713 return start_merge(path);
714 }
715
716 int HashIndex::complete_merge(const vector<string> &path, subdir_info_s info) {
717 vector<string> dst = path;
718 dst.pop_back();
719 subdir_info_s dstinfo;
720 int r, exists;
721 r = path_exists(path, &exists);
722 if (r < 0)
723 return r;
724 r = get_info(dst, &dstinfo);
725 if (r < 0)
726 return r;
727 if (exists) {
728 r = move_objects(path, dst);
729 if (r < 0)
730 return r;
731 r = reset_attr(dst);
732 if (r < 0)
733 return r;
734 r = remove_path(path);
735 if (r < 0)
736 return r;
737 }
738 if (must_merge(dstinfo)) {
739 r = initiate_merge(dst, dstinfo);
740 if (r < 0)
741 return r;
742 r = fsync_dir(dst);
743 if (r < 0)
744 return r;
745 return complete_merge(dst, dstinfo);
746 }
747 r = fsync_dir(dst);
748 if (r < 0)
749 return r;
750 return end_split_or_merge(path);
751 }
752
753 int HashIndex::initiate_split(const vector<string> &path, subdir_info_s info) {
754 return start_split(path);
755 }
756
757 int HashIndex::complete_split(const vector<string> &path, subdir_info_s info) {
758 int level = info.hash_level;
759 map<string, ghobject_t> objects;
760 vector<string> dst = path;
761 int r;
762 dst.push_back("");
763 r = list_objects(path, 0, 0, &objects);
764 if (r < 0)
765 return r;
766 vector<string> subdirs_vec;
767 r = list_subdirs(path, &subdirs_vec);
768 if (r < 0)
769 return r;
770 set<string> subdirs;
771 subdirs.insert(subdirs_vec.begin(), subdirs_vec.end());
772 map<string, map<string, ghobject_t> > mapped;
773 map<string, ghobject_t> moved;
774 int num_moved = 0;
775 for (map<string, ghobject_t>::iterator i = objects.begin();
776 i != objects.end();
777 ++i) {
778 vector<string> new_path;
779 get_path_components(i->second, &new_path);
780 mapped[new_path[level]][i->first] = i->second;
781 }
782 for (map<string, map<string, ghobject_t> >::iterator i = mapped.begin();
783 i != mapped.end();
784 ) {
785 dst[level] = i->first;
786 /* If the info already exists, it must be correct,
787 * we may be picking up a partially finished split */
788 subdir_info_s temp;
789 // subdir has already been fully copied
790 if (subdirs.count(i->first) && !get_info(dst, &temp)) {
791 for (map<string, ghobject_t>::iterator j = i->second.begin();
792 j != i->second.end();
793 ++j) {
794 moved[j->first] = j->second;
795 num_moved++;
796 objects.erase(j->first);
797 }
798 ++i;
799 continue;
800 }
801
802 subdir_info_s info_new;
803 info_new.objs = i->second.size();
804 info_new.subdirs = 0;
805 info_new.hash_level = level + 1;
806 if (must_merge(info_new) && !subdirs.count(i->first)) {
807 mapped.erase(i++);
808 continue;
809 }
810
811 // Subdir doesn't yet exist
812 if (!subdirs.count(i->first)) {
813 info.subdirs += 1;
814 r = create_path(dst);
815 if (r < 0)
816 return r;
817 } // else subdir has been created but only partially copied
818
819 for (map<string, ghobject_t>::iterator j = i->second.begin();
820 j != i->second.end();
821 ++j) {
822 moved[j->first] = j->second;
823 num_moved++;
824 objects.erase(j->first);
825 r = link_object(path, dst, j->second, j->first);
826 // May be a partially finished split
827 if (r < 0 && r != -EEXIST) {
828 return r;
829 }
830 }
831
832 r = fsync_dir(dst);
833 if (r < 0)
834 return r;
835
836 // Presence of info must imply that all objects have been copied
837 r = set_info(dst, info_new);
838 if (r < 0)
839 return r;
840
841 r = fsync_dir(dst);
842 if (r < 0)
843 return r;
844
845 ++i;
846 }
847 r = remove_objects(path, moved, &objects);
848 if (r < 0)
849 return r;
850 info.objs = objects.size();
851 r = reset_attr(path);
852 if (r < 0)
853 return r;
854 r = fsync_dir(path);
855 if (r < 0)
856 return r;
857 return end_split_or_merge(path);
858 }
859
860 void HashIndex::get_path_components(const ghobject_t &oid,
861 vector<string> *path) {
862 char buf[MAX_HASH_LEVEL + 1];
863 snprintf(buf, sizeof(buf), "%.*X", MAX_HASH_LEVEL, (uint32_t)oid.hobj.get_nibblewise_key());
864
865 // Path components are the hex characters of oid.hobj.hash, least
866 // significant first
867 for (int i = 0; i < MAX_HASH_LEVEL; ++i) {
868 path->push_back(string(&buf[i], 1));
869 }
870 }
871
872 string HashIndex::get_hash_str(uint32_t hash) {
873 char buf[MAX_HASH_LEVEL + 1];
874 snprintf(buf, sizeof(buf), "%.*X", MAX_HASH_LEVEL, hash);
875 string retval;
876 for (int i = 0; i < MAX_HASH_LEVEL; ++i) {
877 retval.push_back(buf[MAX_HASH_LEVEL - 1 - i]);
878 }
879 return retval;
880 }
881
882 string HashIndex::get_path_str(const ghobject_t &oid) {
883 assert(!oid.is_max());
884 return get_hash_str(oid.hobj.get_hash());
885 }
886
887 uint32_t HashIndex::hash_prefix_to_hash(string prefix) {
888 while (prefix.size() < sizeof(uint32_t) * 2) {
889 prefix.push_back('0');
890 }
891 uint32_t hash;
892 sscanf(prefix.c_str(), "%x", &hash);
893 // nibble reverse
894 hash = ((hash & 0x0f0f0f0f) << 4) | ((hash & 0xf0f0f0f0) >> 4);
895 hash = ((hash & 0x00ff00ff) << 8) | ((hash & 0xff00ff00) >> 8);
896 hash = ((hash & 0x0000ffff) << 16) | ((hash & 0xffff0000) >> 16);
897 return hash;
898 }
899
900 int HashIndex::get_path_contents_by_hash_bitwise(
901 const vector<string> &path,
902 const ghobject_t *next_object,
903 set<string, CmpHexdigitStringBitwise> *hash_prefixes,
904 set<pair<string, ghobject_t>, CmpPairBitwise> *objects)
905 {
906 map<string, ghobject_t> rev_objects;
907 int r;
908 r = list_objects(path, 0, 0, &rev_objects);
909 if (r < 0)
910 return r;
911 // bitwise sort
912 for (map<string, ghobject_t>::iterator i = rev_objects.begin();
913 i != rev_objects.end();
914 ++i) {
915 if (next_object && i->second < *next_object)
916 continue;
917 string hash_prefix = get_path_str(i->second);
918 hash_prefixes->insert(hash_prefix);
919 objects->insert(pair<string, ghobject_t>(hash_prefix, i->second));
920 }
921 vector<string> subdirs;
922 r = list_subdirs(path, &subdirs);
923 if (r < 0)
924 return r;
925
926 // sort subdirs bitwise (by reversing hex digit nibbles)
927 std::sort(subdirs.begin(), subdirs.end(), cmp_hexdigit_bitwise);
928
929 // Local to this function, we will convert the prefix strings
930 // (previously simply the reversed hex digits) to also have each
931 // digit's nibbles reversed. This will make the strings sort
932 // bitwise.
933 string cur_prefix;
934 for (vector<string>::const_iterator i = path.begin();
935 i != path.end();
936 ++i) {
937 cur_prefix.append(reverse_hexdigit_bits_string(*i));
938 }
939 string next_object_string;
940 if (next_object)
941 next_object_string = reverse_hexdigit_bits_string(get_path_str(*next_object));
942 for (vector<string>::iterator i = subdirs.begin();
943 i != subdirs.end();
944 ++i) {
945 string candidate = cur_prefix + reverse_hexdigit_bits_string(*i);
946 if (next_object) {
947 if (next_object->is_max())
948 continue;
949 if (candidate < next_object_string.substr(0, candidate.size()))
950 continue;
951 }
952 // re-reverse the hex digit nibbles for the caller
953 hash_prefixes->insert(reverse_hexdigit_bits_string(candidate));
954 }
955 return 0;
956 }
957
958 int HashIndex::list_by_hash(const vector<string> &path,
959 const ghobject_t &end,
960 int max_count,
961 ghobject_t *next,
962 vector<ghobject_t> *out)
963 {
964 assert(out);
965 return list_by_hash_bitwise(path, end, max_count, next, out);
966 }
967
968 int HashIndex::list_by_hash_bitwise(
969 const vector<string> &path,
970 const ghobject_t& end,
971 int max_count,
972 ghobject_t *next,
973 vector<ghobject_t> *out)
974 {
975 vector<string> next_path = path;
976 next_path.push_back("");
977 set<string, CmpHexdigitStringBitwise> hash_prefixes;
978 set<pair<string, ghobject_t>, CmpPairBitwise> objects;
979 int r = get_path_contents_by_hash_bitwise(path,
980 next,
981 &hash_prefixes,
982 &objects);
983 if (r < 0)
984 return r;
985 for (set<string, CmpHexdigitStringBitwise>::iterator i = hash_prefixes.begin();
986 i != hash_prefixes.end();
987 ++i) {
988 dout(20) << __func__ << " prefix " << *i << dendl;
989 set<pair<string, ghobject_t>, CmpPairBitwise>::iterator j = objects.lower_bound(
990 make_pair(*i, ghobject_t()));
991 if (j == objects.end() || j->first != *i) {
992 *(next_path.rbegin()) = *(i->rbegin());
993 ghobject_t next_recurse;
994 if (next)
995 next_recurse = *next;
996 r = list_by_hash_bitwise(next_path,
997 end,
998 max_count,
999 &next_recurse,
1000 out);
1001
1002 if (r < 0)
1003 return r;
1004 if (!next_recurse.is_max()) {
1005 if (next)
1006 *next = next_recurse;
1007 return 0;
1008 }
1009 } else {
1010 while (j != objects.end() && j->first == *i) {
1011 if (max_count > 0 && out->size() == (unsigned)max_count) {
1012 if (next)
1013 *next = j->second;
1014 return 0;
1015 }
1016 if (j->second >= end) {
1017 if (next)
1018 *next = j->second;
1019 return 0;
1020 }
1021 if (!next || j->second >= *next) {
1022 dout(20) << __func__ << " prefix " << *i << " ob " << j->second << dendl;
1023 out->push_back(j->second);
1024 }
1025 ++j;
1026 }
1027 }
1028 }
1029 if (next)
1030 *next = ghobject_t::get_max();
1031 return 0;
1032 }
1033
1034