]> git.proxmox.com Git - ceph.git/blob - ceph/src/os/filestore/LFNIndex.cc
import ceph pacific 16.2.5
[ceph.git] / ceph / src / os / filestore / LFNIndex.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15 #include <string>
16 #include <map>
17 #include <set>
18 #include <vector>
19 #include <errno.h>
20 #include <string.h>
21
22 #if defined(__FreeBSD__)
23 #include <sys/param.h>
24 #endif
25
26 #include "osd/osd_types.h"
27 #include "include/object.h"
28 #include "common/config.h"
29 #include "common/debug.h"
30 #include "include/buffer.h"
31 #include "common/ceph_crypto.h"
32 #include "common/errno.h"
33 #include "include/compat.h"
34 #include "chain_xattr.h"
35
36 #include "LFNIndex.h"
37
38 #define dout_context cct
39 #define dout_subsys ceph_subsys_filestore
40 #undef dout_prefix
41 #define dout_prefix *_dout << "LFNIndex(" << get_base_path() << ") "
42
43 using std::map;
44 using std::pair;
45 using std::set;
46 using std::string;
47 using std::vector;
48
49 using ceph::crypto::SHA1;
50
51 using ceph::bufferlist;
52 using ceph::bufferptr;
53
54 const string LFNIndex::LFN_ATTR = "user.cephos.lfn";
55 const string LFNIndex::PHASH_ATTR_PREFIX = "user.cephos.phash.";
56 const string LFNIndex::SUBDIR_PREFIX = "DIR_";
57 const string LFNIndex::FILENAME_COOKIE = "long";
58 const int LFNIndex::FILENAME_PREFIX_LEN = FILENAME_SHORT_LEN - FILENAME_HASH_LEN -
59 FILENAME_COOKIE.size() -
60 FILENAME_EXTRA;
61 void LFNIndex::maybe_inject_failure()
62 {
63 if (error_injection_enabled) {
64 if (current_failure > last_failure &&
65 (((double)(rand() % 10000))/((double)(10000))
66 < error_injection_probability)) {
67 last_failure = current_failure;
68 current_failure = 0;
69 throw RetryException();
70 }
71 ++current_failure;
72 }
73 }
74
75 // Helper to close fd's when we leave scope. This is useful when used
76 // in combination with RetryException, thrown by the above.
77 struct FDCloser {
78 int fd;
79 explicit FDCloser(int f) : fd(f) {}
80 ~FDCloser() {
81 VOID_TEMP_FAILURE_RETRY(::close(fd));
82 }
83 };
84
85
86 /* Public methods */
87
88 uint64_t LFNIndex::get_max_escaped_name_len(const hobject_t &obj)
89 {
90 ghobject_t ghobj(obj);
91 ghobj.shard_id = shard_id_t(0);
92 ghobj.generation = 0;
93 ghobj.hobj.snap = 0;
94 return lfn_generate_object_name_current(ghobj).size();
95 }
96
97 int LFNIndex::init()
98 {
99 return _init();
100 }
101
102 int LFNIndex::created(const ghobject_t &oid, const char *path)
103 {
104 WRAP_RETRY(
105 vector<string> path_comp;
106 string short_name;
107 r = decompose_full_path(path, &path_comp, 0, &short_name);
108 if (r < 0)
109 goto out;
110 r = lfn_created(path_comp, oid, short_name);
111 if (r < 0) {
112 if (failed) {
113 /* This is hacky, but the only way we get ENOENT from lfn_created here is
114 * if we did a failure injection in _created below AND actually started the
115 * split or merge. In that case, lfn_created already suceeded, and
116 * WRAP_RETRY already cleaned it up and we are actually done. In a real
117 * failure, the filestore itself would have ended up calling this with
118 * the new path, not the old one, so we'd find it.
119 */
120 r = 0;
121 }
122 goto out;
123 }
124 r = _created(path_comp, oid, short_name);
125 if (r < 0)
126 goto out;
127 );
128 }
129
130 int LFNIndex::unlink(const ghobject_t &oid)
131 {
132 WRAP_RETRY(
133 vector<string> path;
134 string short_name;
135 r = _lookup(oid, &path, &short_name, NULL);
136 if (r < 0) {
137 goto out;
138 }
139 r = _remove(path, oid, short_name);
140 if (r < 0) {
141 goto out;
142 }
143 );
144 }
145
146 int LFNIndex::lookup(const ghobject_t &oid,
147 IndexedPath *out_path,
148 int *hardlink)
149 {
150 WRAP_RETRY(
151 vector<string> path;
152 string short_name;
153 r = _lookup(oid, &path, &short_name, hardlink);
154 if (r < 0)
155 goto out;
156 string full_path = get_full_path(path, short_name);
157 *out_path = std::make_shared<Path>(full_path, this);
158 r = 0;
159 );
160 }
161
162 int LFNIndex::pre_hash_collection(uint32_t pg_num, uint64_t expected_num_objs)
163 {
164 return _pre_hash_collection(pg_num, expected_num_objs);
165 }
166
167
168 int LFNIndex::collection_list_partial(const ghobject_t &start,
169 const ghobject_t &end,
170 int max_count,
171 vector<ghobject_t> *ls,
172 ghobject_t *next)
173 {
174 return _collection_list_partial(start, end, max_count, ls, next);
175 }
176
177 /* Derived class utility methods */
178
179 int LFNIndex::fsync_dir(const vector<string> &path)
180 {
181 maybe_inject_failure();
182 int fd = ::open(get_full_path_subdir(path).c_str(), O_RDONLY|O_CLOEXEC);
183 if (fd < 0)
184 return -errno;
185 FDCloser f(fd);
186 maybe_inject_failure();
187 int r = ::fsync(fd);
188 maybe_inject_failure();
189 if (r < 0) {
190 derr << __func__ << " fsync failed: " << cpp_strerror(errno) << dendl;
191 ceph_abort();
192 }
193 return 0;
194 }
195
196 int LFNIndex::link_object(const vector<string> &from,
197 const vector<string> &to,
198 const ghobject_t &oid,
199 const string &from_short_name)
200 {
201 int r;
202 string from_path = get_full_path(from, from_short_name);
203 string to_path;
204 maybe_inject_failure();
205 r = lfn_get_name(to, oid, 0, &to_path, 0);
206 if (r < 0)
207 return r;
208 maybe_inject_failure();
209 r = ::link(from_path.c_str(), to_path.c_str());
210 maybe_inject_failure();
211 if (r < 0)
212 return -errno;
213 else
214 return 0;
215 }
216
217 int LFNIndex::remove_objects(const vector<string> &dir,
218 const map<string, ghobject_t> &to_remove,
219 map<string, ghobject_t> *remaining)
220 {
221 set<string> clean_chains;
222 for (map<string, ghobject_t>::const_iterator to_clean = to_remove.begin();
223 to_clean != to_remove.end();
224 ++to_clean) {
225 if (!lfn_is_hashed_filename(to_clean->first)) {
226 maybe_inject_failure();
227 int r = ::unlink(get_full_path(dir, to_clean->first).c_str());
228 maybe_inject_failure();
229 if (r < 0)
230 return -errno;
231 continue;
232 }
233 if (clean_chains.count(lfn_get_short_name(to_clean->second, 0)))
234 continue;
235 set<int> holes;
236 map<int, pair<string, ghobject_t> > chain;
237 for (int i = 0; ; ++i) {
238 string short_name = lfn_get_short_name(to_clean->second, i);
239 if (remaining->count(short_name)) {
240 chain[i] = *(remaining->find(short_name));
241 } else if (to_remove.count(short_name)) {
242 holes.insert(i);
243 } else {
244 break;
245 }
246 }
247
248 map<int, pair<string, ghobject_t > >::reverse_iterator candidate = chain.rbegin();
249 for (set<int>::iterator i = holes.begin();
250 i != holes.end();
251 ++i) {
252 if (candidate == chain.rend() || *i > candidate->first) {
253 string remove_path_name =
254 get_full_path(dir, lfn_get_short_name(to_clean->second, *i));
255 maybe_inject_failure();
256 int r = ::unlink(remove_path_name.c_str());
257 maybe_inject_failure();
258 if (r < 0)
259 return -errno;
260 continue;
261 }
262 string from = get_full_path(dir, candidate->second.first);
263 string to = get_full_path(dir, lfn_get_short_name(candidate->second.second, *i));
264 maybe_inject_failure();
265 int r = ::rename(from.c_str(), to.c_str());
266 maybe_inject_failure();
267 if (r < 0)
268 return -errno;
269 remaining->erase(candidate->second.first);
270 remaining->insert(pair<string, ghobject_t>(
271 lfn_get_short_name(candidate->second.second, *i),
272 candidate->second.second));
273 ++candidate;
274 }
275 if (!holes.empty())
276 clean_chains.insert(lfn_get_short_name(to_clean->second, 0));
277 }
278 return 0;
279 }
280
281 int LFNIndex::move_objects(const vector<string> &from,
282 const vector<string> &to)
283 {
284 map<string, ghobject_t> to_move;
285 int r;
286 r = list_objects(from, 0, NULL, &to_move);
287 if (r < 0)
288 return r;
289 for (map<string,ghobject_t>::iterator i = to_move.begin();
290 i != to_move.end();
291 ++i) {
292 string from_path = get_full_path(from, i->first);
293 string to_path, to_name;
294 r = lfn_get_name(to, i->second, &to_name, &to_path, 0);
295 if (r < 0)
296 return r;
297 maybe_inject_failure();
298 r = ::link(from_path.c_str(), to_path.c_str());
299 if (r < 0 && errno != EEXIST)
300 return -errno;
301 maybe_inject_failure();
302 r = lfn_created(to, i->second, to_name);
303 maybe_inject_failure();
304 if (r < 0)
305 return r;
306 }
307 r = fsync_dir(to);
308 if (r < 0)
309 return r;
310 for (map<string,ghobject_t>::iterator i = to_move.begin();
311 i != to_move.end();
312 ++i) {
313 maybe_inject_failure();
314 r = ::unlink(get_full_path(from, i->first).c_str());
315 maybe_inject_failure();
316 if (r < 0)
317 return -errno;
318 }
319 return fsync_dir(from);
320 }
321
322 int LFNIndex::remove_object(const vector<string> &from,
323 const ghobject_t &oid)
324 {
325 string short_name;
326 int r, exist;
327 maybe_inject_failure();
328 r = get_mangled_name(from, oid, &short_name, &exist);
329 maybe_inject_failure();
330 if (r < 0)
331 return r;
332 if (exist == 0)
333 return -ENOENT;
334 return lfn_unlink(from, oid, short_name);
335 }
336
337 int LFNIndex::get_mangled_name(const vector<string> &from,
338 const ghobject_t &oid,
339 string *mangled_name, int *hardlink)
340 {
341 return lfn_get_name(from, oid, mangled_name, 0, hardlink);
342 }
343
344 int LFNIndex::move_subdir(
345 LFNIndex &from,
346 LFNIndex &dest,
347 const vector<string> &path,
348 string dir
349 )
350 {
351 vector<string> sub_path(path.begin(), path.end());
352 sub_path.push_back(dir);
353 string from_path(from.get_full_path_subdir(sub_path));
354 string to_path(dest.get_full_path_subdir(sub_path));
355 int r = ::rename(from_path.c_str(), to_path.c_str());
356 if (r < 0)
357 return -errno;
358 return 0;
359 }
360
361 int LFNIndex::move_object(
362 LFNIndex &from,
363 LFNIndex &dest,
364 const vector<string> &path,
365 const pair<string, ghobject_t> &obj
366 )
367 {
368 string from_path(from.get_full_path(path, obj.first));
369 string to_path;
370 string to_name;
371 int exists;
372 int r = dest.lfn_get_name(path, obj.second, &to_name, &to_path, &exists);
373 if (r < 0)
374 return r;
375 if (!exists) {
376 r = ::link(from_path.c_str(), to_path.c_str());
377 if (r < 0)
378 return r;
379 }
380 r = dest.lfn_created(path, obj.second, to_name);
381 if (r < 0)
382 return r;
383 r = dest.fsync_dir(path);
384 if (r < 0)
385 return r;
386 r = from.remove_object(path, obj.second);
387 if (r < 0)
388 return r;
389 return from.fsync_dir(path);
390 }
391
392
393 static int get_hobject_from_oinfo(const char *dir, const char *file,
394 ghobject_t *o)
395 {
396 char path[PATH_MAX];
397 snprintf(path, sizeof(path), "%s/%s", dir, file);
398 // Hack, user.ceph._ is the attribute used to store the object info
399 bufferptr bp;
400 int r = chain_getxattr_buf(
401 path,
402 "user.ceph._",
403 &bp);
404 if (r < 0)
405 return r;
406 bufferlist bl;
407 if (r > 0)
408 bl.push_back(bp);
409 object_info_t oi(bl);
410 *o = ghobject_t(oi.soid);
411 return 0;
412 }
413
414
415 int LFNIndex::list_objects(const vector<string> &to_list, int max_objs,
416 long *handle, map<string, ghobject_t> *out)
417 {
418 string to_list_path = get_full_path_subdir(to_list);
419 DIR *dir = ::opendir(to_list_path.c_str());
420 if (!dir) {
421 return -errno;
422 }
423
424 if (handle && *handle) {
425 seekdir(dir, *handle);
426 }
427
428 struct dirent *de = nullptr;
429 int r = 0;
430 int listed = 0;
431 bool end = true;
432 while (true) {
433 errno = 0;
434 de = ::readdir(dir);
435 if (de == nullptr) {
436 if (errno != 0) {
437 r = -errno;
438 dout(0) << "readdir failed " << to_list_path << ": "
439 << cpp_strerror(-r) << dendl;
440 goto cleanup;
441 }
442 break;
443 }
444 end = false;
445 if (max_objs > 0 && listed >= max_objs) {
446 break;
447 }
448 if (de->d_name[0] == '.')
449 continue;
450 string short_name(de->d_name);
451 ghobject_t obj;
452 if (lfn_is_object(short_name)) {
453 r = lfn_translate(to_list, short_name, &obj);
454 if (r == -EINVAL) {
455 continue;
456 } else if (r < 0) {
457 goto cleanup;
458 } else {
459 string long_name = lfn_generate_object_name(obj);
460 if (!lfn_must_hash(long_name)) {
461 ceph_assert(long_name == short_name);
462 }
463 if (index_version == HASH_INDEX_TAG)
464 get_hobject_from_oinfo(to_list_path.c_str(), short_name.c_str(), &obj);
465
466 out->insert(pair<string, ghobject_t>(short_name, obj));
467 ++listed;
468 }
469 }
470 }
471
472 if (handle && !end) {
473 *handle = telldir(dir);
474 }
475
476 r = 0;
477 cleanup:
478 ::closedir(dir);
479 return r;
480 }
481
482 int LFNIndex::list_subdirs(const vector<string> &to_list,
483 vector<string> *out)
484 {
485 string to_list_path = get_full_path_subdir(to_list);
486 DIR *dir = ::opendir(to_list_path.c_str());
487 if (!dir)
488 return -errno;
489
490 struct dirent *de = nullptr;
491 int r = 0;
492 while (true) {
493 errno = 0;
494 de = ::readdir(dir);
495 if (de == nullptr) {
496 if (errno != 0) {
497 r = -errno;
498 dout(0) << "readdir failed " << to_list_path << ": "
499 << cpp_strerror(-r) << dendl;
500 }
501 break;
502 }
503 string short_name(de->d_name);
504 string demangled_name;
505 if (lfn_is_subdir(short_name, &demangled_name)) {
506 out->push_back(demangled_name);
507 }
508 }
509
510 ::closedir(dir);
511 return r;
512 }
513
514 int LFNIndex::create_path(const vector<string> &to_create)
515 {
516 maybe_inject_failure();
517 int r = ::mkdir(get_full_path_subdir(to_create).c_str(), 0777);
518 maybe_inject_failure();
519 if (r < 0)
520 return -errno;
521 else
522 return 0;
523 }
524
525 int LFNIndex::remove_path(const vector<string> &to_remove)
526 {
527 maybe_inject_failure();
528 int r = ::rmdir(get_full_path_subdir(to_remove).c_str());
529 maybe_inject_failure();
530 if (r < 0)
531 return -errno;
532 else
533 return 0;
534 }
535
536 int LFNIndex::path_exists(const vector<string> &to_check, int *exists)
537 {
538 string full_path = get_full_path_subdir(to_check);
539 struct stat buf;
540 if (::stat(full_path.c_str(), &buf)) {
541 int r = -errno;
542 if (r == -ENOENT) {
543 *exists = 0;
544 return 0;
545 } else {
546 return r;
547 }
548 } else {
549 *exists = 1;
550 return 0;
551 }
552 }
553
554 int LFNIndex::add_attr_path(const vector<string> &path,
555 const string &attr_name,
556 bufferlist &attr_value)
557 {
558 string full_path = get_full_path_subdir(path);
559 maybe_inject_failure();
560 return chain_setxattr<false, true>(
561 full_path.c_str(), mangle_attr_name(attr_name).c_str(),
562 reinterpret_cast<void *>(attr_value.c_str()),
563 attr_value.length());
564 }
565
566 int LFNIndex::get_attr_path(const vector<string> &path,
567 const string &attr_name,
568 bufferlist &attr_value)
569 {
570 string full_path = get_full_path_subdir(path);
571 bufferptr bp;
572 int r = chain_getxattr_buf(
573 full_path.c_str(),
574 mangle_attr_name(attr_name).c_str(),
575 &bp);
576 if (r > 0)
577 attr_value.push_back(bp);
578 return r;
579 }
580
581 int LFNIndex::remove_attr_path(const vector<string> &path,
582 const string &attr_name)
583 {
584 string full_path = get_full_path_subdir(path);
585 string mangled_attr_name = mangle_attr_name(attr_name);
586 maybe_inject_failure();
587 return chain_removexattr(full_path.c_str(), mangled_attr_name.c_str());
588 }
589
590 string LFNIndex::lfn_generate_object_name_keyless(const ghobject_t &oid)
591 {
592 char s[FILENAME_MAX_LEN];
593 char *end = s + sizeof(s);
594 char *t = s;
595
596 ceph_assert(oid.generation == ghobject_t::NO_GEN);
597 const char *i = oid.hobj.oid.name.c_str();
598 // Escape subdir prefix
599 if (oid.hobj.oid.name.substr(0, 4) == "DIR_") {
600 *t++ = '\\';
601 *t++ = 'd';
602 i += 4;
603 }
604 while (*i && t < end) {
605 if (*i == '\\') {
606 *t++ = '\\';
607 *t++ = '\\';
608 } else if (*i == '.' && i == oid.hobj.oid.name.c_str()) { // only escape leading .
609 *t++ = '\\';
610 *t++ = '.';
611 } else if (*i == '/') {
612 *t++ = '\\';
613 *t++ = 's';
614 } else
615 *t++ = *i;
616 i++;
617 }
618
619 if (oid.hobj.snap == CEPH_NOSNAP)
620 t += snprintf(t, end - t, "_head");
621 else if (oid.hobj.snap == CEPH_SNAPDIR)
622 t += snprintf(t, end - t, "_snapdir");
623 else
624 t += snprintf(t, end - t, "_%llx", (long long unsigned)oid.hobj.snap);
625 snprintf(t, end - t, "_%.*X", (int)(sizeof(oid.hobj.get_hash())*2), oid.hobj.get_hash());
626
627 return string(s);
628 }
629
630 static void append_escaped(string::const_iterator begin,
631 string::const_iterator end,
632 string *out)
633 {
634 for (string::const_iterator i = begin; i != end; ++i) {
635 if (*i == '\\') {
636 out->append("\\\\");
637 } else if (*i == '/') {
638 out->append("\\s");
639 } else if (*i == '_') {
640 out->append("\\u");
641 } else if (*i == '\0') {
642 out->append("\\n");
643 } else {
644 out->append(i, i+1);
645 }
646 }
647 }
648
649 string LFNIndex::lfn_generate_object_name_current(const ghobject_t &oid)
650 {
651 string full_name;
652 string::const_iterator i = oid.hobj.oid.name.begin();
653 if (oid.hobj.oid.name.substr(0, 4) == "DIR_") {
654 full_name.append("\\d");
655 i += 4;
656 } else if (oid.hobj.oid.name[0] == '.') {
657 full_name.append("\\.");
658 ++i;
659 }
660 append_escaped(i, oid.hobj.oid.name.end(), &full_name);
661 full_name.append("_");
662 append_escaped(oid.hobj.get_key().begin(), oid.hobj.get_key().end(), &full_name);
663 full_name.append("_");
664
665 char buf[PATH_MAX];
666 char *t = buf;
667 const char *end = t + sizeof(buf);
668 if (oid.hobj.snap == CEPH_NOSNAP)
669 t += snprintf(t, end - t, "head");
670 else if (oid.hobj.snap == CEPH_SNAPDIR)
671 t += snprintf(t, end - t, "snapdir");
672 else
673 t += snprintf(t, end - t, "%llx", (long long unsigned)oid.hobj.snap);
674 t += snprintf(t, end - t, "_%.*X", (int)(sizeof(oid.hobj.get_hash())*2), oid.hobj.get_hash());
675 full_name.append(buf, t);
676 full_name.append("_");
677
678 append_escaped(oid.hobj.nspace.begin(), oid.hobj.nspace.end(), &full_name);
679 full_name.append("_");
680
681 t = buf;
682 if (oid.hobj.pool == -1)
683 t += snprintf(t, end - t, "none");
684 else
685 t += snprintf(t, end - t, "%llx", (long long unsigned)oid.hobj.pool);
686 full_name.append(buf, t);
687
688 if (oid.generation != ghobject_t::NO_GEN ||
689 oid.shard_id != shard_id_t::NO_SHARD) {
690 full_name.append("_");
691
692 t = buf;
693 t += snprintf(t, end - buf, "%llx", (long long unsigned)oid.generation);
694 full_name.append(buf, t);
695
696 full_name.append("_");
697
698 t = buf;
699 t += snprintf(t, end - buf, "%x", (int)oid.shard_id);
700 full_name.append(buf, t);
701 }
702
703 return full_name;
704 }
705
706 string LFNIndex::lfn_generate_object_name_poolless(const ghobject_t &oid)
707 {
708 if (index_version == HASH_INDEX_TAG)
709 return lfn_generate_object_name_keyless(oid);
710
711 ceph_assert(oid.generation == ghobject_t::NO_GEN);
712 string full_name;
713 string::const_iterator i = oid.hobj.oid.name.begin();
714 if (oid.hobj.oid.name.substr(0, 4) == "DIR_") {
715 full_name.append("\\d");
716 i += 4;
717 } else if (oid.hobj.oid.name[0] == '.') {
718 full_name.append("\\.");
719 ++i;
720 }
721 append_escaped(i, oid.hobj.oid.name.end(), &full_name);
722 full_name.append("_");
723 append_escaped(oid.hobj.get_key().begin(), oid.hobj.get_key().end(), &full_name);
724 full_name.append("_");
725
726 char snap_with_hash[PATH_MAX];
727 char *t = snap_with_hash;
728 char *end = t + sizeof(snap_with_hash);
729 if (oid.hobj.snap == CEPH_NOSNAP)
730 t += snprintf(t, end - t, "head");
731 else if (oid.hobj.snap == CEPH_SNAPDIR)
732 t += snprintf(t, end - t, "snapdir");
733 else
734 t += snprintf(t, end - t, "%llx", (long long unsigned)oid.hobj.snap);
735 snprintf(t, end - t, "_%.*X", (int)(sizeof(oid.hobj.get_hash())*2), oid.hobj.get_hash());
736 full_name += string(snap_with_hash);
737 return full_name;
738 }
739
740 int LFNIndex::lfn_get_name(const vector<string> &path,
741 const ghobject_t &oid,
742 string *mangled_name, string *out_path,
743 int *hardlink)
744 {
745 string full_name = lfn_generate_object_name(oid);
746 int r;
747
748 if (!lfn_must_hash(full_name)) {
749 if (mangled_name)
750 *mangled_name = full_name;
751 if (out_path)
752 *out_path = get_full_path(path, full_name);
753 if (hardlink) {
754 struct stat buf;
755 string full_path = get_full_path(path, full_name);
756 maybe_inject_failure();
757 r = ::stat(full_path.c_str(), &buf);
758 if (r < 0) {
759 if (errno == ENOENT)
760 *hardlink = 0;
761 else
762 return -errno;
763 } else {
764 *hardlink = buf.st_nlink;
765 }
766 }
767 return 0;
768 }
769
770 int i = 0;
771 string candidate;
772 string candidate_path;
773 for ( ; ; ++i) {
774 candidate = lfn_get_short_name(oid, i);
775 candidate_path = get_full_path(path, candidate);
776 bufferptr bp;
777 r = chain_getxattr_buf(
778 candidate_path.c_str(),
779 get_lfn_attr().c_str(),
780 &bp);
781 if (r < 0) {
782 if (errno != ENODATA && errno != ENOENT)
783 return -errno;
784 if (errno == ENODATA) {
785 // Left over from incomplete transaction, it'll be replayed
786 maybe_inject_failure();
787 r = ::unlink(candidate_path.c_str());
788 maybe_inject_failure();
789 if (r < 0)
790 return -errno;
791 }
792 if (mangled_name)
793 *mangled_name = candidate;
794 if (out_path)
795 *out_path = candidate_path;
796 if (hardlink)
797 *hardlink = 0;
798 return 0;
799 }
800 ceph_assert(r > 0);
801 string lfn(bp.c_str(), bp.length());
802 if (lfn == full_name) {
803 if (mangled_name)
804 *mangled_name = candidate;
805 if (out_path)
806 *out_path = candidate_path;
807 if (hardlink) {
808 struct stat st;
809 r = ::stat(candidate_path.c_str(), &st);
810 if (r < 0) {
811 if (errno == ENOENT)
812 *hardlink = 0;
813 else
814 return -errno;
815 } else {
816 *hardlink = st.st_nlink;
817 }
818 }
819 return 0;
820 }
821 bp = bufferptr();
822 r = chain_getxattr_buf(
823 candidate_path.c_str(),
824 get_alt_lfn_attr().c_str(),
825 &bp);
826 if (r > 0) {
827 // only consider alt name if nlink > 1
828 struct stat st;
829 int rc = ::stat(candidate_path.c_str(), &st);
830 if (rc < 0)
831 return -errno;
832 if (st.st_nlink <= 1) {
833 // left over from incomplete unlink, remove
834 maybe_inject_failure();
835 dout(20) << __func__ << " found extra alt attr for " << candidate_path
836 << ", long name " << string(bp.c_str(), bp.length()) << dendl;
837 rc = chain_removexattr(candidate_path.c_str(),
838 get_alt_lfn_attr().c_str());
839 maybe_inject_failure();
840 if (rc < 0)
841 return rc;
842 continue;
843 }
844 string lfn(bp.c_str(), bp.length());
845 if (lfn == full_name) {
846 dout(20) << __func__ << " used alt attr for " << full_name << dendl;
847 if (mangled_name)
848 *mangled_name = candidate;
849 if (out_path)
850 *out_path = candidate_path;
851 if (hardlink)
852 *hardlink = st.st_nlink;
853 return 0;
854 }
855 }
856 }
857 ceph_abort(); // Unreachable
858 return 0;
859 }
860
861 int LFNIndex::lfn_created(const vector<string> &path,
862 const ghobject_t &oid,
863 const string &mangled_name)
864 {
865 if (!lfn_is_hashed_filename(mangled_name))
866 return 0;
867 string full_path = get_full_path(path, mangled_name);
868 string full_name = lfn_generate_object_name(oid);
869 maybe_inject_failure();
870
871 // if the main attr exists and is different, move it to the alt attr.
872 bufferptr bp;
873 int r = chain_getxattr_buf(
874 full_path.c_str(),
875 get_lfn_attr().c_str(),
876 &bp);
877 if (r > 0) {
878 string lfn(bp.c_str(), bp.length());
879 if (lfn != full_name) {
880 dout(20) << __func__ << " " << mangled_name
881 << " moving old name to alt attr "
882 << lfn
883 << ", new name is " << full_name << dendl;
884 r = chain_setxattr<false, true>(
885 full_path.c_str(), get_alt_lfn_attr().c_str(),
886 bp.c_str(), bp.length());
887 if (r < 0)
888 return r;
889 }
890 }
891
892 return chain_setxattr<false, true>(
893 full_path.c_str(), get_lfn_attr().c_str(),
894 full_name.c_str(), full_name.size());
895 }
896
897 int LFNIndex::lfn_unlink(const vector<string> &path,
898 const ghobject_t &oid,
899 const string &mangled_name)
900 {
901 if (!lfn_is_hashed_filename(mangled_name)) {
902 string full_path = get_full_path(path, mangled_name);
903 maybe_inject_failure();
904 int r = ::unlink(full_path.c_str());
905 maybe_inject_failure();
906 if (r < 0)
907 return -errno;
908 return 0;
909 }
910
911 int i = 0;
912 for ( ; ; ++i) {
913 string candidate = lfn_get_short_name(oid, i);
914 if (candidate == mangled_name)
915 break;
916 }
917 int removed_index = i;
918 ++i;
919 for ( ; ; ++i) {
920 struct stat buf;
921 string to_check = lfn_get_short_name(oid, i);
922 string to_check_path = get_full_path(path, to_check);
923 int r = ::stat(to_check_path.c_str(), &buf);
924 if (r < 0) {
925 if (errno == ENOENT) {
926 break;
927 } else {
928 return -errno;
929 }
930 }
931 }
932 string full_path = get_full_path(path, mangled_name);
933 int fd = ::open(full_path.c_str(), O_RDONLY|O_CLOEXEC);
934 if (fd < 0)
935 return -errno;
936 FDCloser f(fd);
937 if (i == removed_index + 1) {
938 maybe_inject_failure();
939 int r = ::unlink(full_path.c_str());
940 maybe_inject_failure();
941 if (r < 0)
942 return -errno;
943 } else {
944 string& rename_to = full_path;
945 string rename_from = get_full_path(path, lfn_get_short_name(oid, i - 1));
946 maybe_inject_failure();
947 int r = ::rename(rename_from.c_str(), rename_to.c_str());
948 maybe_inject_failure();
949 if (r < 0)
950 return -errno;
951 }
952 struct stat st;
953 int r = ::fstat(fd, &st);
954 if (r == 0 && st.st_nlink > 0) {
955 // remove alt attr
956 dout(20) << __func__ << " removing alt attr from " << full_path << dendl;
957 fsync_dir(path);
958 chain_fremovexattr(fd, get_alt_lfn_attr().c_str());
959 }
960 return r;
961 }
962
963 int LFNIndex::lfn_translate(const vector<string> &path,
964 const string &short_name,
965 ghobject_t *out)
966 {
967 if (!lfn_is_hashed_filename(short_name)) {
968 return lfn_parse_object_name(short_name, out);
969 }
970 string full_path = get_full_path(path, short_name);
971 // First, check alt attr
972 bufferptr bp;
973 int r = chain_getxattr_buf(
974 full_path.c_str(),
975 get_alt_lfn_attr().c_str(),
976 &bp);
977 if (r > 0) {
978 // There is an alt attr, does it match?
979 string lfn(bp.c_str(), bp.length());
980 if (short_name_matches(short_name.c_str(), lfn.c_str())) {
981 return lfn_parse_object_name(lfn, out);
982 }
983 }
984
985 // Get lfn_attr
986 bp = bufferptr();
987 r = chain_getxattr_buf(
988 full_path.c_str(),
989 get_lfn_attr().c_str(),
990 &bp);
991 if (r < 0)
992 return r;
993 if (r == 0)
994 return -EINVAL;
995
996 string long_name(bp.c_str(), bp.length());
997 return lfn_parse_object_name(long_name, out);
998 }
999
1000 bool LFNIndex::lfn_is_object(const string &short_name)
1001 {
1002 return lfn_is_hashed_filename(short_name) || !lfn_is_subdir(short_name, 0);
1003 }
1004
1005 bool LFNIndex::lfn_is_subdir(const string &name, string *demangled)
1006 {
1007 if (name.substr(0, SUBDIR_PREFIX.size()) == SUBDIR_PREFIX) {
1008 if (demangled)
1009 *demangled = demangle_path_component(name);
1010 return 1;
1011 }
1012 return 0;
1013 }
1014
1015 static int parse_object(const char *s, ghobject_t& o)
1016 {
1017 const char *hash = s + strlen(s) - 1;
1018 while (*hash != '_' &&
1019 hash > s)
1020 hash--;
1021 const char *bar = hash - 1;
1022 while (*bar != '_' &&
1023 bar > s)
1024 bar--;
1025 if (*bar == '_') {
1026 char buf[bar-s + 1];
1027 char *t = buf;
1028 const char *i = s;
1029 while (i < bar) {
1030 if (*i == '\\') {
1031 i++;
1032 switch (*i) {
1033 case '\\': *t++ = '\\'; break;
1034 case '.': *t++ = '.'; break;
1035 case 's': *t++ = '/'; break;
1036 case 'd': {
1037 *t++ = 'D';
1038 *t++ = 'I';
1039 *t++ = 'R';
1040 *t++ = '_';
1041 break;
1042 }
1043 default: ceph_abort();
1044 }
1045 } else {
1046 *t++ = *i;
1047 }
1048 i++;
1049 }
1050 *t = 0;
1051 o.hobj.oid.name = string(buf, t-buf);
1052 if (strncmp(bar+1, "head", 4) == 0)
1053 o.hobj.snap = CEPH_NOSNAP;
1054 else if (strncmp(bar+1, "snapdir", 7) == 0)
1055 o.hobj.snap = CEPH_SNAPDIR;
1056 else
1057 o.hobj.snap = strtoull(bar+1, NULL, 16);
1058
1059 uint32_t hobject_hash_input;
1060 sscanf(hash, "_%X", &hobject_hash_input);
1061 o.hobj.set_hash(hobject_hash_input);
1062
1063 return 1;
1064 }
1065 return 0;
1066 }
1067
1068 int LFNIndex::lfn_parse_object_name_keyless(const string &long_name, ghobject_t *out)
1069 {
1070 int r = parse_object(long_name.c_str(), *out);
1071 int64_t pool = -1;
1072 spg_t pg;
1073 if (coll().is_pg_prefix(&pg))
1074 pool = (int64_t)pg.pgid.pool();
1075 out->hobj.pool = pool;
1076 if (!r) return -EINVAL;
1077 string temp = lfn_generate_object_name(*out);
1078 return 0;
1079 }
1080
1081 static bool append_unescaped(string::const_iterator begin,
1082 string::const_iterator end,
1083 string *out)
1084 {
1085 for (string::const_iterator i = begin; i != end; ++i) {
1086 if (*i == '\\') {
1087 ++i;
1088 if (*i == '\\')
1089 out->append("\\");
1090 else if (*i == 's')
1091 out->append("/");
1092 else if (*i == 'n')
1093 (*out) += '\0';
1094 else if (*i == 'u')
1095 out->append("_");
1096 else
1097 return false;
1098 } else {
1099 out->append(i, i+1);
1100 }
1101 }
1102 return true;
1103 }
1104
1105 int LFNIndex::lfn_parse_object_name_poolless(const string &long_name,
1106 ghobject_t *out)
1107 {
1108 string name;
1109 string key;
1110 uint32_t hash;
1111 snapid_t snap;
1112
1113 string::const_iterator current = long_name.begin();
1114 if (*current == '\\') {
1115 ++current;
1116 if (current == long_name.end()) {
1117 return -EINVAL;
1118 } else if (*current == 'd') {
1119 name.append("DIR_");
1120 ++current;
1121 } else if (*current == '.') {
1122 name.append(".");
1123 ++current;
1124 } else {
1125 --current;
1126 }
1127 }
1128
1129 string::const_iterator end = current;
1130 for ( ; end != long_name.end() && *end != '_'; ++end) ;
1131 if (end == long_name.end())
1132 return -EINVAL;
1133 if (!append_unescaped(current, end, &name))
1134 return -EINVAL;
1135
1136 current = ++end;
1137 for ( ; end != long_name.end() && *end != '_'; ++end) ;
1138 if (end == long_name.end())
1139 return -EINVAL;
1140 if (!append_unescaped(current, end, &key))
1141 return -EINVAL;
1142
1143 current = ++end;
1144 for ( ; end != long_name.end() && *end != '_'; ++end) ;
1145 if (end == long_name.end())
1146 return -EINVAL;
1147 string snap_str(current, end);
1148
1149 current = ++end;
1150 for ( ; end != long_name.end() && *end != '_'; ++end) ;
1151 if (end != long_name.end())
1152 return -EINVAL;
1153 string hash_str(current, end);
1154
1155 if (snap_str == "head")
1156 snap = CEPH_NOSNAP;
1157 else if (snap_str == "snapdir")
1158 snap = CEPH_SNAPDIR;
1159 else
1160 snap = strtoull(snap_str.c_str(), NULL, 16);
1161 sscanf(hash_str.c_str(), "%X", &hash);
1162
1163
1164 int64_t pool = -1;
1165 spg_t pg;
1166 if (coll().is_pg_prefix(&pg))
1167 pool = (int64_t)pg.pgid.pool();
1168 (*out) = ghobject_t(hobject_t(name, key, snap, hash, pool, ""));
1169 return 0;
1170 }
1171
1172
1173 int LFNIndex::lfn_parse_object_name(const string &long_name, ghobject_t *out)
1174 {
1175 string name;
1176 string key;
1177 string ns;
1178 uint32_t hash;
1179 snapid_t snap;
1180 uint64_t pool;
1181 gen_t generation = ghobject_t::NO_GEN;
1182 shard_id_t shard_id = shard_id_t::NO_SHARD;
1183
1184 if (index_version == HASH_INDEX_TAG)
1185 return lfn_parse_object_name_keyless(long_name, out);
1186 if (index_version == HASH_INDEX_TAG_2)
1187 return lfn_parse_object_name_poolless(long_name, out);
1188
1189 string::const_iterator current = long_name.begin();
1190 if (*current == '\\') {
1191 ++current;
1192 if (current == long_name.end()) {
1193 return -EINVAL;
1194 } else if (*current == 'd') {
1195 name.append("DIR_");
1196 ++current;
1197 } else if (*current == '.') {
1198 name.append(".");
1199 ++current;
1200 } else {
1201 --current;
1202 }
1203 }
1204
1205 string::const_iterator end = current;
1206 for ( ; end != long_name.end() && *end != '_'; ++end) ;
1207 if (end == long_name.end())
1208 return -EINVAL;
1209 if (!append_unescaped(current, end, &name))
1210 return -EINVAL;
1211
1212 current = ++end;
1213 for ( ; end != long_name.end() && *end != '_'; ++end) ;
1214 if (end == long_name.end())
1215 return -EINVAL;
1216 if (!append_unescaped(current, end, &key))
1217 return -EINVAL;
1218
1219 current = ++end;
1220 for ( ; end != long_name.end() && *end != '_'; ++end) ;
1221 if (end == long_name.end())
1222 return -EINVAL;
1223 string snap_str(current, end);
1224
1225 current = ++end;
1226 for ( ; end != long_name.end() && *end != '_'; ++end) ;
1227 if (end == long_name.end())
1228 return -EINVAL;
1229 string hash_str(current, end);
1230
1231 current = ++end;
1232 for ( ; end != long_name.end() && *end != '_'; ++end) ;
1233 if (end == long_name.end())
1234 return -EINVAL;
1235 if (!append_unescaped(current, end, &ns))
1236 return -EINVAL;
1237
1238 current = ++end;
1239 for ( ; end != long_name.end() && *end != '_'; ++end) ;
1240 string pstring(current, end);
1241
1242 // Optional generation/shard_id
1243 string genstring, shardstring;
1244 if (end != long_name.end()) {
1245 current = ++end;
1246 for ( ; end != long_name.end() && *end != '_'; ++end) ;
1247 if (end == long_name.end())
1248 return -EINVAL;
1249 genstring = string(current, end);
1250
1251 generation = (gen_t)strtoull(genstring.c_str(), NULL, 16);
1252
1253 current = ++end;
1254 for ( ; end != long_name.end() && *end != '_'; ++end) ;
1255 if (end != long_name.end())
1256 return -EINVAL;
1257 shardstring = string(current, end);
1258
1259 shard_id = (shard_id_t)strtoul(shardstring.c_str(), NULL, 16);
1260 }
1261
1262 if (snap_str == "head")
1263 snap = CEPH_NOSNAP;
1264 else if (snap_str == "snapdir")
1265 snap = CEPH_SNAPDIR;
1266 else
1267 snap = strtoull(snap_str.c_str(), NULL, 16);
1268 sscanf(hash_str.c_str(), "%X", &hash);
1269
1270 if (pstring == "none")
1271 pool = (uint64_t)-1;
1272 else
1273 pool = strtoull(pstring.c_str(), NULL, 16);
1274
1275 (*out) = ghobject_t(hobject_t(name, key, snap, hash, (int64_t)pool, ns), generation, shard_id);
1276 return 0;
1277 }
1278
1279 bool LFNIndex::lfn_is_hashed_filename(const string &name)
1280 {
1281 if (name.size() < (unsigned)FILENAME_SHORT_LEN) {
1282 return 0;
1283 }
1284 if (name.substr(name.size() - FILENAME_COOKIE.size(), FILENAME_COOKIE.size())
1285 == FILENAME_COOKIE) {
1286 return 1;
1287 } else {
1288 return 0;
1289 }
1290 }
1291
1292 bool LFNIndex::lfn_must_hash(const string &long_name)
1293 {
1294 return (int)long_name.size() >= FILENAME_SHORT_LEN;
1295 }
1296
1297 static inline void buf_to_hex(const unsigned char *buf, int len, char *str)
1298 {
1299 int i;
1300 str[0] = '\0';
1301 for (i = 0; i < len; i++) {
1302 sprintf(&str[i*2], "%02x", (int)buf[i]);
1303 }
1304 }
1305
1306 int LFNIndex::hash_filename(const char *filename, char *hash, int buf_len)
1307 {
1308 if (buf_len < FILENAME_HASH_LEN + 1)
1309 return -EINVAL;
1310
1311 char buf[FILENAME_LFN_DIGEST_SIZE];
1312 char hex[FILENAME_LFN_DIGEST_SIZE * 2];
1313
1314 SHA1 h;
1315 h.Update((const unsigned char *)filename, strlen(filename));
1316 h.Final((unsigned char *)buf);
1317
1318 buf_to_hex((unsigned char *)buf, (FILENAME_HASH_LEN + 1) / 2, hex);
1319 strncpy(hash, hex, FILENAME_HASH_LEN);
1320 hash[FILENAME_HASH_LEN] = '\0';
1321 return 0;
1322 }
1323
1324 void LFNIndex::build_filename(const char *old_filename, int i, char *filename, int len)
1325 {
1326 char hash[FILENAME_HASH_LEN + 1];
1327
1328 ceph_assert(len >= FILENAME_SHORT_LEN + 4);
1329
1330 strncpy(filename, old_filename, FILENAME_PREFIX_LEN);
1331 filename[FILENAME_PREFIX_LEN] = '\0';
1332 if ((int)strlen(filename) < FILENAME_PREFIX_LEN)
1333 return;
1334 if (old_filename[FILENAME_PREFIX_LEN] == '\0')
1335 return;
1336
1337 hash_filename(old_filename, hash, sizeof(hash));
1338 int ofs = FILENAME_PREFIX_LEN;
1339 while (1) {
1340 int suffix_len = sprintf(filename + ofs, "_%s_%d_%s", hash, i, FILENAME_COOKIE.c_str());
1341 if (ofs + suffix_len <= FILENAME_SHORT_LEN || !ofs)
1342 break;
1343 ofs--;
1344 }
1345 }
1346
1347 bool LFNIndex::short_name_matches(const char *short_name, const char *cand_long_name)
1348 {
1349 const char *end = short_name;
1350 while (*end) ++end;
1351 const char *suffix = end;
1352 if (suffix > short_name) --suffix; // last char
1353 while (suffix > short_name && *suffix != '_') --suffix; // back to first _
1354 if (suffix > short_name) --suffix; // one behind that
1355 while (suffix > short_name && *suffix != '_') --suffix; // back to second _
1356
1357 int index = -1;
1358 char buf[FILENAME_SHORT_LEN + 4];
1359 ceph_assert((end - suffix) < (int)sizeof(buf));
1360 int r = sscanf(suffix, "_%d_%s", &index, buf);
1361 if (r < 2)
1362 return false;
1363 if (strcmp(buf, FILENAME_COOKIE.c_str()) != 0)
1364 return false;
1365 build_filename(cand_long_name, index, buf, sizeof(buf));
1366 return strcmp(short_name, buf) == 0;
1367 }
1368
1369 string LFNIndex::lfn_get_short_name(const ghobject_t &oid, int i)
1370 {
1371 string long_name = lfn_generate_object_name(oid);
1372 ceph_assert(lfn_must_hash(long_name));
1373 char buf[FILENAME_SHORT_LEN + 4];
1374 build_filename(long_name.c_str(), i, buf, sizeof(buf));
1375 return string(buf);
1376 }
1377
1378 const string &LFNIndex::get_base_path()
1379 {
1380 return base_path;
1381 }
1382
1383 string LFNIndex::get_full_path_subdir(const vector<string> &rel)
1384 {
1385 string retval = get_base_path();
1386 for (vector<string>::const_iterator i = rel.begin();
1387 i != rel.end();
1388 ++i) {
1389 retval += "/";
1390 retval += mangle_path_component(*i);
1391 }
1392 return retval;
1393 }
1394
1395 string LFNIndex::get_full_path(const vector<string> &rel, const string &name)
1396 {
1397 return get_full_path_subdir(rel) + "/" + name;
1398 }
1399
1400 string LFNIndex::mangle_path_component(const string &component)
1401 {
1402 return SUBDIR_PREFIX + component;
1403 }
1404
1405 string LFNIndex::demangle_path_component(const string &component)
1406 {
1407 return component.substr(SUBDIR_PREFIX.size(), component.size() - SUBDIR_PREFIX.size());
1408 }
1409
1410 int LFNIndex::decompose_full_path(const char *in, vector<string> *out,
1411 ghobject_t *oid, string *shortname)
1412 {
1413 const char *beginning = in + get_base_path().size();
1414 const char *end = beginning;
1415 while (1) {
1416 end++;
1417 beginning = end++;
1418 for ( ; *end != '\0' && *end != '/'; ++end) ;
1419 if (*end != '\0') {
1420 out->push_back(demangle_path_component(string(beginning, end - beginning)));
1421 continue;
1422 } else {
1423 break;
1424 }
1425 }
1426 *shortname = string(beginning, end - beginning);
1427 if (oid) {
1428 int r = lfn_translate(*out, *shortname, oid);
1429 if (r < 0)
1430 return r;
1431 }
1432 return 0;
1433 }
1434
1435 string LFNIndex::mangle_attr_name(const string &attr)
1436 {
1437 return PHASH_ATTR_PREFIX + attr;
1438 }