]> git.proxmox.com Git - ceph.git/blame - ceph/src/os/filestore/HashIndex.cc
update sources to 12.2.8
[ceph.git] / ceph / src / os / filestore / HashIndex.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
c07f9fc5 15#include "include/compat.h"
7c673cae
FG
16#include "include/types.h"
17#include "include/buffer.h"
18#include "osd/osd_types.h"
19#include <errno.h>
20
21#include "HashIndex.h"
22
23#include "common/errno.h"
24#include "common/debug.h"
25#define dout_context cct
26#define dout_subsys ceph_subsys_filestore
27
28const string HashIndex::SUBDIR_ATTR = "contents";
224ce89b 29const string HashIndex::SETTINGS_ATTR = "settings";
7c673cae
FG
30const string HashIndex::IN_PROGRESS_OP_TAG = "in_progress_op";
31
32/// hex digit to integer value
33int hex_to_int(char c)
34{
35 if (c >= '0' && c <= '9')
36 return c - '0';
37 if (c >= 'A' && c <= 'F')
38 return c - 'A' + 10;
39 ceph_abort();
40}
41
42/// int value to hex digit
43char int_to_hex(int v)
44{
45 assert(v < 16);
46 if (v < 10)
47 return '0' + v;
48 return 'A' + v - 10;
49}
50
51/// reverse bits in a nibble (0..15)
52int reverse_nibble_bits(int in)
53{
54 assert(in < 16);
55 return
56 ((in & 8) >> 3) |
57 ((in & 4) >> 1) |
58 ((in & 2) << 1) |
59 ((in & 1) << 3);
60}
61
62/// reverse nibble bits in a hex digit
63char reverse_hexdigit_bits(char c)
64{
65 return int_to_hex(reverse_nibble_bits(hex_to_int(c)));
66}
67
68/// reverse nibble bits in a hex string
69string reverse_hexdigit_bits_string(string s)
70{
71 for (unsigned i=0; i<s.size(); ++i)
72 s[i] = reverse_hexdigit_bits(s[i]);
73 return s;
74}
75
76/// compare hex digit (as length 1 string) bitwise
77bool cmp_hexdigit_bitwise(const string& l, const string& r)
78{
79 assert(l.length() == 1 && r.length() == 1);
80 int lv = hex_to_int(l[0]);
81 int rv = hex_to_int(r[0]);
82 assert(lv < 16);
83 assert(rv < 16);
84 return reverse_nibble_bits(lv) < reverse_nibble_bits(rv);
85}
86
87/// compare hex digit string bitwise
88bool cmp_hexdigit_string_bitwise(const string& l, const string& r)
89{
90 string ll = reverse_hexdigit_bits_string(l);
91 string rr = reverse_hexdigit_bits_string(r);
92 return ll < rr;
93}
94
95int HashIndex::cleanup() {
96 bufferlist bl;
97 int r = get_attr_path(vector<string>(), IN_PROGRESS_OP_TAG, bl);
98 if (r < 0) {
99 // No in progress operations!
100 return 0;
101 }
102 bufferlist::iterator i = bl.begin();
103 InProgressOp in_progress(i);
104 subdir_info_s info;
105 r = get_info(in_progress.path, &info);
106 if (r == -ENOENT) {
107 return end_split_or_merge(in_progress.path);
108 } else if (r < 0) {
109 return r;
110 }
111
112 if (in_progress.is_split())
113 return complete_split(in_progress.path, info);
114 else if (in_progress.is_merge())
115 return complete_merge(in_progress.path, info);
116 else if (in_progress.is_col_split()) {
117 for (vector<string>::iterator i = in_progress.path.begin();
118 i != in_progress.path.end();
119 ++i) {
120 vector<string> path(in_progress.path.begin(), i);
121 int r = reset_attr(path);
122 if (r < 0)
123 return r;
124 }
125 return 0;
126 }
127 else
128 return -EINVAL;
129}
130
131int HashIndex::reset_attr(
132 const vector<string> &path)
133{
134 int exists = 0;
135 int r = path_exists(path, &exists);
136 if (r < 0)
137 return r;
138 if (!exists)
139 return 0;
140 map<string, ghobject_t> objects;
141 vector<string> subdirs;
142 r = list_objects(path, 0, 0, &objects);
143 if (r < 0)
144 return r;
145 r = list_subdirs(path, &subdirs);
146 if (r < 0)
147 return r;
148
149 subdir_info_s info;
150 info.hash_level = path.size();
151 info.objs = objects.size();
152 info.subdirs = subdirs.size();
153 return set_info(path, info);
154}
155
156int HashIndex::col_split_level(
157 HashIndex &from,
158 HashIndex &to,
159 const vector<string> &path,
160 uint32_t inbits,
161 uint32_t match,
162 unsigned *mkdirred)
163{
164 /* For each subdir, move, recurse, or ignore based on comparing the low order
165 * bits of the hash represented by the subdir path with inbits, match passed
166 * in.
167 */
168 vector<string> subdirs;
169 int r = from.list_subdirs(path, &subdirs);
170 if (r < 0)
171 return r;
172 map<string, ghobject_t> objects;
173 r = from.list_objects(path, 0, 0, &objects);
174 if (r < 0)
175 return r;
176
177 set<string> to_move;
178 for (vector<string>::iterator i = subdirs.begin();
179 i != subdirs.end();
180 ++i) {
181 uint32_t bits = 0;
182 uint32_t hash = 0;
183 vector<string> sub_path(path.begin(), path.end());
184 sub_path.push_back(*i);
185 path_to_hobject_hash_prefix(sub_path, &bits, &hash);
186 if (bits < inbits) {
187 if (hobject_t::match_hash(hash, bits, match)) {
188 r = col_split_level(
189 from,
190 to,
191 sub_path,
192 inbits,
193 match,
194 mkdirred);
195 if (r < 0)
196 return r;
197 if (*mkdirred > path.size())
198 *mkdirred = path.size();
199 } // else, skip, doesn't need to be moved or recursed into
200 } else {
201 if (hobject_t::match_hash(hash, inbits, match)) {
202 to_move.insert(*i);
203 }
204 } // else, skip, doesn't need to be moved or recursed into
205 }
206
207 /* Then, do the same for each object */
208 map<string, ghobject_t> objs_to_move;
209 for (map<string, ghobject_t>::iterator i = objects.begin();
210 i != objects.end();
211 ++i) {
212 if (i->second.match(inbits, match)) {
213 objs_to_move.insert(*i);
214 }
215 }
216
217 if (objs_to_move.empty() && to_move.empty())
218 return 0;
219
220 // Make parent directories as needed
221 while (*mkdirred < path.size()) {
222 ++*mkdirred;
223 int exists = 0;
224 vector<string> creating_path(path.begin(), path.begin()+*mkdirred);
225 r = to.path_exists(creating_path, &exists);
226 if (r < 0)
227 return r;
228 if (exists)
229 continue;
230 subdir_info_s info;
231 info.objs = 0;
232 info.subdirs = 0;
233 info.hash_level = creating_path.size();
234 if (*mkdirred < path.size() - 1)
235 info.subdirs = 1;
236 r = to.start_col_split(creating_path);
237 if (r < 0)
238 return r;
239 r = to.create_path(creating_path);
240 if (r < 0)
241 return r;
242 r = to.set_info(creating_path, info);
243 if (r < 0)
244 return r;
245 r = to.end_split_or_merge(creating_path);
246 if (r < 0)
247 return r;
248 }
249
250 subdir_info_s from_info;
251 subdir_info_s to_info;
252 r = from.get_info(path, &from_info);
253 if (r < 0)
254 return r;
255 r = to.get_info(path, &to_info);
256 if (r < 0)
257 return r;
258
259 from.start_col_split(path);
260 to.start_col_split(path);
261
262 // Do subdir moves
263 for (set<string>::iterator i = to_move.begin();
264 i != to_move.end();
265 ++i) {
266 from_info.subdirs--;
267 to_info.subdirs++;
268 r = move_subdir(from, to, path, *i);
269 if (r < 0)
270 return r;
271 }
272
273 for (map<string, ghobject_t>::iterator i = objs_to_move.begin();
274 i != objs_to_move.end();
275 ++i) {
276 from_info.objs--;
277 to_info.objs++;
278 r = move_object(from, to, path, *i);
279 if (r < 0)
280 return r;
281 }
282
283
284 r = to.set_info(path, to_info);
285 if (r < 0)
286 return r;
287 r = from.set_info(path, from_info);
288 if (r < 0)
289 return r;
290 from.end_split_or_merge(path);
291 to.end_split_or_merge(path);
292 return 0;
293}
294
295int HashIndex::_split(
296 uint32_t match,
297 uint32_t bits,
298 CollectionIndex* dest) {
299 assert(collection_version() == dest->collection_version());
300 unsigned mkdirred = 0;
301 return col_split_level(
302 *this,
303 *static_cast<HashIndex*>(dest),
304 vector<string>(),
305 bits,
306 match,
307 &mkdirred);
308}
309
1adf2230
AA
310int HashIndex::split_dirs(const vector<string> &path, int target_level) {
311 dout(20) << __func__ << " " << path << " target level: "
312 << target_level << dendl;
7c673cae
FG
313 subdir_info_s info;
314 int r = get_info(path, &info);
315 if (r < 0) {
316 dout(10) << "error looking up info for " << path << ": "
317 << cpp_strerror(r) << dendl;
318 return r;
319 }
320
1adf2230 321 if (must_split(info, target_level)) {
7c673cae 322 dout(1) << __func__ << " " << path << " has " << info.objs
1adf2230
AA
323 << " objects, " << info.hash_level
324 << " level, starting split in pg " << coll() << "." << dendl;
7c673cae
FG
325 r = initiate_split(path, info);
326 if (r < 0) {
327 dout(10) << "error initiating split on " << path << ": "
328 << cpp_strerror(r) << dendl;
329 return r;
330 }
331
332 r = complete_split(path, info);
1adf2230 333 dout(1) << __func__ << " " << path << " split completed in pg " << coll() << "."
7c673cae
FG
334 << dendl;
335 if (r < 0) {
336 dout(10) << "error completing split on " << path << ": "
337 << cpp_strerror(r) << dendl;
338 return r;
339 }
340 }
341
342 vector<string> subdirs;
343 r = list_subdirs(path, &subdirs);
344 if (r < 0) {
345 dout(10) << "error listing subdirs of " << path << ": "
346 << cpp_strerror(r) << dendl;
347 return r;
348 }
349 for (vector<string>::const_iterator it = subdirs.begin();
350 it != subdirs.end(); ++it) {
351 vector<string> subdir_path(path);
352 subdir_path.push_back(*it);
1adf2230 353 r = split_dirs(subdir_path, target_level);
7c673cae
FG
354 if (r < 0) {
355 return r;
356 }
357 }
358
359 return r;
360}
361
1adf2230 362int HashIndex::apply_layout_settings(int target_level) {
7c673cae
FG
363 vector<string> path;
364 dout(10) << __func__ << " split multiple = " << split_multiplier
224ce89b
WB
365 << " merge threshold = " << merge_threshold
366 << " split rand factor = " << cct->_conf->filestore_split_rand_factor
1adf2230 367 << " target level = " << target_level
224ce89b
WB
368 << dendl;
369 int r = write_settings();
370 if (r < 0)
371 return r;
1adf2230 372 return split_dirs(path, target_level);
7c673cae
FG
373}
374
375int HashIndex::_init() {
376 subdir_info_s info;
377 vector<string> path;
224ce89b
WB
378 int r = set_info(path, info);
379 if (r < 0)
380 return r;
381 return write_settings();
382}
383
384int HashIndex::write_settings() {
385 if (cct->_conf->filestore_split_rand_factor > 0) {
386 settings.split_rand_factor = rand() % cct->_conf->filestore_split_rand_factor;
387 } else {
388 settings.split_rand_factor = 0;
389 }
390 vector<string> path;
391 bufferlist bl;
392 settings.encode(bl);
393 return add_attr_path(path, SETTINGS_ATTR, bl);
394}
395
396int HashIndex::read_settings() {
397 vector<string> path;
398 bufferlist bl;
399 int r = get_attr_path(path, SETTINGS_ATTR, bl);
400 if (r == -ENODATA)
401 return 0;
402 if (r < 0) {
403 derr << __func__ << " error reading settings: " << cpp_strerror(r) << dendl;
404 return r;
405 }
406 bufferlist::iterator it = bl.begin();
407 settings.decode(it);
408 dout(20) << __func__ << " split_rand_factor = " << settings.split_rand_factor << dendl;
409 return 0;
7c673cae
FG
410}
411
412/* LFNIndex virtual method implementations */
413int HashIndex::_created(const vector<string> &path,
414 const ghobject_t &oid,
415 const string &mangled_name) {
416 subdir_info_s info;
417 int r;
418 r = get_info(path, &info);
419 if (r < 0)
420 return r;
421 info.objs++;
422 r = set_info(path, info);
423 if (r < 0)
424 return r;
425
426 if (must_split(info)) {
427 dout(1) << __func__ << " " << path << " has " << info.objs
1adf2230 428 << " objects, starting split in pg " << coll() << "." << dendl;
7c673cae
FG
429 int r = initiate_split(path, info);
430 if (r < 0)
431 return r;
432 r = complete_split(path, info);
1adf2230 433 dout(1) << __func__ << " " << path << " split completed in pg " << coll() << "."
7c673cae
FG
434 << dendl;
435 return r;
436 } else {
437 return 0;
438 }
439}
440
441int HashIndex::_remove(const vector<string> &path,
442 const ghobject_t &oid,
443 const string &mangled_name) {
444 int r;
445 r = remove_object(path, oid);
446 if (r < 0)
447 return r;
448 subdir_info_s info;
449 r = get_info(path, &info);
450 if (r < 0)
451 return r;
452 info.objs--;
453 r = set_info(path, info);
454 if (r < 0)
455 return r;
456 if (must_merge(info)) {
457 r = initiate_merge(path, info);
458 if (r < 0)
459 return r;
460 return complete_merge(path, info);
461 } else {
462 return 0;
463 }
464}
465
466int HashIndex::_lookup(const ghobject_t &oid,
467 vector<string> *path,
468 string *mangled_name,
469 int *hardlink) {
470 vector<string> path_comp;
471 get_path_components(oid, &path_comp);
472 vector<string>::iterator next = path_comp.begin();
473 int exists;
474 while (1) {
475 int r = path_exists(*path, &exists);
476 if (r < 0)
477 return r;
478 if (!exists) {
479 if (path->empty())
480 return -ENOENT;
481 path->pop_back();
482 break;
483 }
484 if (next == path_comp.end())
485 break;
486 path->push_back(*(next++));
487 }
488 return get_mangled_name(*path, oid, mangled_name, hardlink);
489}
490
491int HashIndex::_collection_list_partial(const ghobject_t &start,
492 const ghobject_t &end,
493 int max_count,
494 vector<ghobject_t> *ls,
495 ghobject_t *next) {
496 vector<string> path;
497 ghobject_t _next;
498 if (!next)
499 next = &_next;
500 *next = start;
501 dout(20) << __func__ << " start:" << start << " end:" << end << "-" << max_count << " ls.size " << ls->size() << dendl;
502 return list_by_hash(path, end, max_count, next, ls);
503}
504
505int HashIndex::prep_delete() {
506 return recursive_remove(vector<string>());
507}
508
509int HashIndex::_pre_hash_collection(uint32_t pg_num, uint64_t expected_num_objs) {
510 int ret;
511 vector<string> path;
512 subdir_info_s root_info;
513 // Make sure there is neither objects nor sub-folders
514 // in this collection
515 ret = get_info(path, &root_info);
516 if (ret < 0)
517 return ret;
518
519 // Do the folder splitting first
520 ret = pre_split_folder(pg_num, expected_num_objs);
521 if (ret < 0)
522 return ret;
523 // Initialize the folder info starting from root
524 return init_split_folder(path, 0);
525}
526
527int HashIndex::pre_split_folder(uint32_t pg_num, uint64_t expected_num_objs)
528{
529 // If folder merging is enabled (by setting the threshold positive),
530 // no need to split
531 if (merge_threshold > 0)
532 return 0;
533 const coll_t c = coll();
534 // Do not split if the expected number of objects in this collection is zero (by default)
535 if (expected_num_objs == 0)
536 return 0;
537
538 // Calculate the number of leaf folders (which actually store files)
539 // need to be created
224ce89b 540 const uint64_t objs_per_folder = ((uint64_t)(abs(merge_threshold)) * (uint64_t)split_multiplier + settings.split_rand_factor) * 16;
7c673cae
FG
541 uint64_t leavies = expected_num_objs / objs_per_folder ;
542 // No need to split
543 if (leavies == 0 || expected_num_objs == objs_per_folder)
544 return 0;
545
546 spg_t spgid;
547 if (!c.is_pg_prefix(&spgid))
548 return -EINVAL;
549 const ps_t ps = spgid.pgid.ps();
550
551 // the most significant bits of pg_num
552 const int pg_num_bits = calc_num_bits(pg_num - 1);
553 ps_t tmp_id = ps;
554 // calculate the number of levels we only create one sub folder
555 int num = pg_num_bits / 4;
556 // pg num's hex value is like 1xxx,xxxx,xxxx but not 1111,1111,1111,
557 // so that splitting starts at level 3
558 if (pg_num_bits % 4 == 0 && pg_num < ((uint32_t)1 << pg_num_bits)) {
559 --num;
560 }
561
562 int ret;
563 // Start with creation that only has one subfolder
564 vector<string> paths;
565 int dump_num = num;
566 while (num-- > 0) {
567 ps_t v = tmp_id & 0x0000000f;
568 paths.push_back(to_hex(v));
569 ret = create_path(paths);
570 if (ret < 0 && ret != -EEXIST)
571 return ret;
572 tmp_id = tmp_id >> 4;
573 }
574
575 // Starting from here, we can split by creating multiple subfolders
576 const int left_bits = pg_num_bits - dump_num * 4;
577 // this variable denotes how many bits (for this level) that can be
578 // used for sub folder splitting
579 int split_bits = 4 - left_bits;
580 // the below logic is inspired by rados.h#ceph_stable_mod,
581 // it basically determines how many sub-folders should we
582 // create for splitting
583 assert(pg_num_bits > 0); // otherwise BAD_SHIFT
584 if (((1 << (pg_num_bits - 1)) | ps) >= pg_num) {
585 ++split_bits;
586 }
587 const uint32_t subs = (1 << split_bits);
588 // Calculate how many levels we create starting from here
589 int level = 0;
590 leavies /= subs;
591 while (leavies > 1) {
592 ++level;
593 leavies = leavies >> 4;
594 }
595 for (uint32_t i = 0; i < subs; ++i) {
596 assert(split_bits <= 4); // otherwise BAD_SHIFT
597 int v = tmp_id | (i << ((4 - split_bits) % 4));
598 paths.push_back(to_hex(v));
599 ret = create_path(paths);
600 if (ret < 0 && ret != -EEXIST)
601 return ret;
602 ret = recursive_create_path(paths, level);
603 if (ret < 0)
604 return ret;
605 paths.pop_back();
606 }
607 return 0;
608}
609
610int HashIndex::init_split_folder(vector<string> &path, uint32_t hash_level)
611{
612 // Get the number of sub directories for the current path
613 vector<string> subdirs;
614 int ret = list_subdirs(path, &subdirs);
615 if (ret < 0)
616 return ret;
617 subdir_info_s info;
618 info.subdirs = subdirs.size();
619 info.hash_level = hash_level;
620 ret = set_info(path, info);
621 if (ret < 0)
622 return ret;
623 ret = fsync_dir(path);
624 if (ret < 0)
625 return ret;
626
627 // Do the same for subdirs
628 vector<string>::const_iterator iter;
629 for (iter = subdirs.begin(); iter != subdirs.end(); ++iter) {
630 path.push_back(*iter);
631 ret = init_split_folder(path, hash_level + 1);
632 if (ret < 0)
633 return ret;
634 path.pop_back();
635 }
636 return 0;
637}
638
639int HashIndex::recursive_create_path(vector<string>& path, int level)
640{
641 if (level == 0)
642 return 0;
643 for (int i = 0; i < 16; ++i) {
644 path.push_back(to_hex(i));
645 int ret = create_path(path);
646 if (ret < 0 && ret != -EEXIST)
647 return ret;
648 ret = recursive_create_path(path, level - 1);
649 if (ret < 0)
650 return ret;
651 path.pop_back();
652 }
653 return 0;
654}
655
656int HashIndex::recursive_remove(const vector<string> &path) {
657 return _recursive_remove(path, true);
658}
659
660int HashIndex::_recursive_remove(const vector<string> &path, bool top) {
661 vector<string> subdirs;
662 dout(20) << __func__ << " path=" << path << dendl;
663 int r = list_subdirs(path, &subdirs);
664 if (r < 0)
665 return r;
666 map<string, ghobject_t> objects;
667 r = list_objects(path, 0, 0, &objects);
668 if (r < 0)
669 return r;
670 if (!objects.empty())
671 return -ENOTEMPTY;
672 vector<string> subdir(path);
673 for (vector<string>::iterator i = subdirs.begin();
674 i != subdirs.end();
675 ++i) {
676 subdir.push_back(*i);
677 r = _recursive_remove(subdir, false);
678 if (r < 0)
679 return r;
680 subdir.pop_back();
681 }
682 if (top)
683 return 0;
684 else
685 return remove_path(path);
686}
687
688int HashIndex::start_col_split(const vector<string> &path) {
689 bufferlist bl;
690 InProgressOp op_tag(InProgressOp::COL_SPLIT, path);
691 op_tag.encode(bl);
692 int r = add_attr_path(vector<string>(), IN_PROGRESS_OP_TAG, bl);
693 if (r < 0)
694 return r;
695 return fsync_dir(vector<string>());
696}
697
698int HashIndex::start_split(const vector<string> &path) {
699 bufferlist bl;
700 InProgressOp op_tag(InProgressOp::SPLIT, path);
701 op_tag.encode(bl);
702 int r = add_attr_path(vector<string>(), IN_PROGRESS_OP_TAG, bl);
703 if (r < 0)
704 return r;
705 return fsync_dir(vector<string>());
706}
707
708int HashIndex::start_merge(const vector<string> &path) {
709 bufferlist bl;
710 InProgressOp op_tag(InProgressOp::MERGE, path);
711 op_tag.encode(bl);
712 int r = add_attr_path(vector<string>(), IN_PROGRESS_OP_TAG, bl);
713 if (r < 0)
714 return r;
715 return fsync_dir(vector<string>());
716}
717
718int HashIndex::end_split_or_merge(const vector<string> &path) {
719 return remove_attr_path(vector<string>(), IN_PROGRESS_OP_TAG);
720}
721
722int HashIndex::get_info(const vector<string> &path, subdir_info_s *info) {
723 bufferlist buf;
724 int r = get_attr_path(path, SUBDIR_ATTR, buf);
725 if (r < 0)
726 return r;
727 bufferlist::iterator bufiter = buf.begin();
728 info->decode(bufiter);
729 assert(path.size() == (unsigned)info->hash_level);
730 return 0;
731}
732
733int HashIndex::set_info(const vector<string> &path, const subdir_info_s &info) {
734 bufferlist buf;
735 assert(path.size() == (unsigned)info.hash_level);
736 info.encode(buf);
737 return add_attr_path(path, SUBDIR_ATTR, buf);
738}
739
740bool HashIndex::must_merge(const subdir_info_s &info) {
741 return (info.hash_level > 0 &&
742 merge_threshold > 0 &&
743 info.objs < (unsigned)merge_threshold &&
744 info.subdirs == 0);
745}
746
1adf2230
AA
747bool HashIndex::must_split(const subdir_info_s &info, int target_level) {
748 // target_level is used for ceph-objectstore-tool to split dirs offline.
749 // if it is set (defalult is 0) and current hash level < target_level,
750 // this dir would be split no matters how many objects it has.
7c673cae 751 return (info.hash_level < (unsigned)MAX_HASH_LEVEL &&
1adf2230
AA
752 ((target_level > 0 && info.hash_level < (unsigned)target_level) ||
753 (info.objs > ((unsigned)(abs(merge_threshold) * split_multiplier + settings.split_rand_factor) * 16))));
7c673cae
FG
754}
755
756int HashIndex::initiate_merge(const vector<string> &path, subdir_info_s info) {
757 return start_merge(path);
758}
759
760int HashIndex::complete_merge(const vector<string> &path, subdir_info_s info) {
761 vector<string> dst = path;
762 dst.pop_back();
763 subdir_info_s dstinfo;
764 int r, exists;
765 r = path_exists(path, &exists);
766 if (r < 0)
767 return r;
768 r = get_info(dst, &dstinfo);
769 if (r < 0)
770 return r;
771 if (exists) {
772 r = move_objects(path, dst);
773 if (r < 0)
774 return r;
775 r = reset_attr(dst);
776 if (r < 0)
777 return r;
778 r = remove_path(path);
779 if (r < 0)
780 return r;
781 }
782 if (must_merge(dstinfo)) {
783 r = initiate_merge(dst, dstinfo);
784 if (r < 0)
785 return r;
786 r = fsync_dir(dst);
787 if (r < 0)
788 return r;
789 return complete_merge(dst, dstinfo);
790 }
791 r = fsync_dir(dst);
792 if (r < 0)
793 return r;
794 return end_split_or_merge(path);
795}
796
797int HashIndex::initiate_split(const vector<string> &path, subdir_info_s info) {
798 return start_split(path);
799}
800
801int HashIndex::complete_split(const vector<string> &path, subdir_info_s info) {
802 int level = info.hash_level;
803 map<string, ghobject_t> objects;
804 vector<string> dst = path;
805 int r;
806 dst.push_back("");
807 r = list_objects(path, 0, 0, &objects);
808 if (r < 0)
809 return r;
810 vector<string> subdirs_vec;
811 r = list_subdirs(path, &subdirs_vec);
812 if (r < 0)
813 return r;
814 set<string> subdirs;
815 subdirs.insert(subdirs_vec.begin(), subdirs_vec.end());
816 map<string, map<string, ghobject_t> > mapped;
817 map<string, ghobject_t> moved;
818 int num_moved = 0;
819 for (map<string, ghobject_t>::iterator i = objects.begin();
820 i != objects.end();
821 ++i) {
822 vector<string> new_path;
823 get_path_components(i->second, &new_path);
824 mapped[new_path[level]][i->first] = i->second;
825 }
826 for (map<string, map<string, ghobject_t> >::iterator i = mapped.begin();
827 i != mapped.end();
828 ) {
829 dst[level] = i->first;
830 /* If the info already exists, it must be correct,
831 * we may be picking up a partially finished split */
832 subdir_info_s temp;
833 // subdir has already been fully copied
834 if (subdirs.count(i->first) && !get_info(dst, &temp)) {
835 for (map<string, ghobject_t>::iterator j = i->second.begin();
836 j != i->second.end();
837 ++j) {
838 moved[j->first] = j->second;
839 num_moved++;
840 objects.erase(j->first);
841 }
842 ++i;
843 continue;
844 }
845
846 subdir_info_s info_new;
847 info_new.objs = i->second.size();
848 info_new.subdirs = 0;
849 info_new.hash_level = level + 1;
850 if (must_merge(info_new) && !subdirs.count(i->first)) {
851 mapped.erase(i++);
852 continue;
853 }
854
855 // Subdir doesn't yet exist
856 if (!subdirs.count(i->first)) {
857 info.subdirs += 1;
858 r = create_path(dst);
859 if (r < 0)
860 return r;
861 } // else subdir has been created but only partially copied
862
863 for (map<string, ghobject_t>::iterator j = i->second.begin();
864 j != i->second.end();
865 ++j) {
866 moved[j->first] = j->second;
867 num_moved++;
868 objects.erase(j->first);
869 r = link_object(path, dst, j->second, j->first);
870 // May be a partially finished split
871 if (r < 0 && r != -EEXIST) {
872 return r;
873 }
874 }
875
876 r = fsync_dir(dst);
877 if (r < 0)
878 return r;
879
880 // Presence of info must imply that all objects have been copied
881 r = set_info(dst, info_new);
882 if (r < 0)
883 return r;
884
885 r = fsync_dir(dst);
886 if (r < 0)
887 return r;
888
889 ++i;
890 }
891 r = remove_objects(path, moved, &objects);
892 if (r < 0)
893 return r;
894 info.objs = objects.size();
895 r = reset_attr(path);
896 if (r < 0)
897 return r;
898 r = fsync_dir(path);
899 if (r < 0)
900 return r;
901 return end_split_or_merge(path);
902}
903
904void HashIndex::get_path_components(const ghobject_t &oid,
905 vector<string> *path) {
906 char buf[MAX_HASH_LEVEL + 1];
907 snprintf(buf, sizeof(buf), "%.*X", MAX_HASH_LEVEL, (uint32_t)oid.hobj.get_nibblewise_key());
908
909 // Path components are the hex characters of oid.hobj.hash, least
910 // significant first
911 for (int i = 0; i < MAX_HASH_LEVEL; ++i) {
912 path->push_back(string(&buf[i], 1));
913 }
914}
915
916string HashIndex::get_hash_str(uint32_t hash) {
917 char buf[MAX_HASH_LEVEL + 1];
918 snprintf(buf, sizeof(buf), "%.*X", MAX_HASH_LEVEL, hash);
919 string retval;
920 for (int i = 0; i < MAX_HASH_LEVEL; ++i) {
921 retval.push_back(buf[MAX_HASH_LEVEL - 1 - i]);
922 }
923 return retval;
924}
925
926string HashIndex::get_path_str(const ghobject_t &oid) {
927 assert(!oid.is_max());
928 return get_hash_str(oid.hobj.get_hash());
929}
930
931uint32_t HashIndex::hash_prefix_to_hash(string prefix) {
932 while (prefix.size() < sizeof(uint32_t) * 2) {
933 prefix.push_back('0');
934 }
935 uint32_t hash;
936 sscanf(prefix.c_str(), "%x", &hash);
937 // nibble reverse
938 hash = ((hash & 0x0f0f0f0f) << 4) | ((hash & 0xf0f0f0f0) >> 4);
939 hash = ((hash & 0x00ff00ff) << 8) | ((hash & 0xff00ff00) >> 8);
940 hash = ((hash & 0x0000ffff) << 16) | ((hash & 0xffff0000) >> 16);
941 return hash;
942}
943
944int HashIndex::get_path_contents_by_hash_bitwise(
945 const vector<string> &path,
946 const ghobject_t *next_object,
947 set<string, CmpHexdigitStringBitwise> *hash_prefixes,
948 set<pair<string, ghobject_t>, CmpPairBitwise> *objects)
949{
950 map<string, ghobject_t> rev_objects;
951 int r;
952 r = list_objects(path, 0, 0, &rev_objects);
953 if (r < 0)
954 return r;
955 // bitwise sort
956 for (map<string, ghobject_t>::iterator i = rev_objects.begin();
957 i != rev_objects.end();
958 ++i) {
959 if (next_object && i->second < *next_object)
960 continue;
961 string hash_prefix = get_path_str(i->second);
962 hash_prefixes->insert(hash_prefix);
963 objects->insert(pair<string, ghobject_t>(hash_prefix, i->second));
964 }
965 vector<string> subdirs;
966 r = list_subdirs(path, &subdirs);
967 if (r < 0)
968 return r;
969
970 // sort subdirs bitwise (by reversing hex digit nibbles)
971 std::sort(subdirs.begin(), subdirs.end(), cmp_hexdigit_bitwise);
972
973 // Local to this function, we will convert the prefix strings
974 // (previously simply the reversed hex digits) to also have each
975 // digit's nibbles reversed. This will make the strings sort
976 // bitwise.
977 string cur_prefix;
978 for (vector<string>::const_iterator i = path.begin();
979 i != path.end();
980 ++i) {
981 cur_prefix.append(reverse_hexdigit_bits_string(*i));
982 }
983 string next_object_string;
984 if (next_object)
985 next_object_string = reverse_hexdigit_bits_string(get_path_str(*next_object));
986 for (vector<string>::iterator i = subdirs.begin();
987 i != subdirs.end();
988 ++i) {
989 string candidate = cur_prefix + reverse_hexdigit_bits_string(*i);
990 if (next_object) {
991 if (next_object->is_max())
992 continue;
993 if (candidate < next_object_string.substr(0, candidate.size()))
994 continue;
995 }
996 // re-reverse the hex digit nibbles for the caller
997 hash_prefixes->insert(reverse_hexdigit_bits_string(candidate));
998 }
999 return 0;
1000}
1001
1002int HashIndex::list_by_hash(const vector<string> &path,
1003 const ghobject_t &end,
1004 int max_count,
1005 ghobject_t *next,
1006 vector<ghobject_t> *out)
1007{
1008 assert(out);
1009 return list_by_hash_bitwise(path, end, max_count, next, out);
1010}
1011
1012int HashIndex::list_by_hash_bitwise(
1013 const vector<string> &path,
1014 const ghobject_t& end,
1015 int max_count,
1016 ghobject_t *next,
1017 vector<ghobject_t> *out)
1018{
1019 vector<string> next_path = path;
1020 next_path.push_back("");
1021 set<string, CmpHexdigitStringBitwise> hash_prefixes;
1022 set<pair<string, ghobject_t>, CmpPairBitwise> objects;
1023 int r = get_path_contents_by_hash_bitwise(path,
1024 next,
1025 &hash_prefixes,
1026 &objects);
1027 if (r < 0)
1028 return r;
1029 for (set<string, CmpHexdigitStringBitwise>::iterator i = hash_prefixes.begin();
1030 i != hash_prefixes.end();
1031 ++i) {
1032 dout(20) << __func__ << " prefix " << *i << dendl;
1033 set<pair<string, ghobject_t>, CmpPairBitwise>::iterator j = objects.lower_bound(
1034 make_pair(*i, ghobject_t()));
1035 if (j == objects.end() || j->first != *i) {
1036 *(next_path.rbegin()) = *(i->rbegin());
1037 ghobject_t next_recurse;
1038 if (next)
1039 next_recurse = *next;
1040 r = list_by_hash_bitwise(next_path,
1041 end,
1042 max_count,
1043 &next_recurse,
1044 out);
1045
1046 if (r < 0)
1047 return r;
1048 if (!next_recurse.is_max()) {
1049 if (next)
1050 *next = next_recurse;
1051 return 0;
1052 }
1053 } else {
1054 while (j != objects.end() && j->first == *i) {
1055 if (max_count > 0 && out->size() == (unsigned)max_count) {
1056 if (next)
1057 *next = j->second;
1058 return 0;
1059 }
1060 if (j->second >= end) {
1061 if (next)
1062 *next = j->second;
1063 return 0;
1064 }
1065 if (!next || j->second >= *next) {
1066 dout(20) << __func__ << " prefix " << *i << " ob " << j->second << dendl;
1067 out->push_back(j->second);
1068 }
1069 ++j;
1070 }
1071 }
1072 }
1073 if (next)
1074 *next = ghobject_t::get_max();
1075 return 0;
1076}
1077
1078