]> git.proxmox.com Git - ceph.git/blame - ceph/src/os/filestore/LFNIndex.cc
import ceph 12.2.12
[ceph.git] / ceph / src / os / filestore / LFNIndex.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15#include <string>
16#include <map>
17#include <set>
18#include <vector>
19#include <errno.h>
20#include <string.h>
21
22#if defined(__FreeBSD__)
23#include <sys/param.h>
24#endif
25
26#include "osd/osd_types.h"
27#include "include/object.h"
28#include "common/config.h"
29#include "common/debug.h"
30#include "include/buffer.h"
31#include "common/ceph_crypto.h"
a8e16298 32#include "common/errno.h"
7c673cae
FG
33#include "include/compat.h"
34#include "chain_xattr.h"
35
36#include "LFNIndex.h"
37using ceph::crypto::SHA1;
38
39#define dout_context cct
40#define dout_subsys ceph_subsys_filestore
41#undef dout_prefix
42#define dout_prefix *_dout << "LFNIndex(" << get_base_path() << ") "
43
44
45const string LFNIndex::LFN_ATTR = "user.cephos.lfn";
46const string LFNIndex::PHASH_ATTR_PREFIX = "user.cephos.phash.";
47const string LFNIndex::SUBDIR_PREFIX = "DIR_";
48const string LFNIndex::FILENAME_COOKIE = "long";
49const int LFNIndex::FILENAME_PREFIX_LEN = FILENAME_SHORT_LEN - FILENAME_HASH_LEN -
50 FILENAME_COOKIE.size() -
51 FILENAME_EXTRA;
52void LFNIndex::maybe_inject_failure()
53{
54 if (error_injection_enabled) {
55 if (current_failure > last_failure &&
56 (((double)(rand() % 10000))/((double)(10000))
57 < error_injection_probability)) {
58 last_failure = current_failure;
59 current_failure = 0;
60 throw RetryException();
61 }
62 ++current_failure;
63 }
64}
65
66// Helper to close fd's when we leave scope. This is useful when used
67// in combination with RetryException, thrown by the above.
68struct FDCloser {
69 int fd;
70 explicit FDCloser(int f) : fd(f) {}
71 ~FDCloser() {
72 VOID_TEMP_FAILURE_RETRY(::close(fd));
73 }
74};
75
76
77/* Public methods */
78
79uint64_t LFNIndex::get_max_escaped_name_len(const hobject_t &obj)
80{
81 ghobject_t ghobj(obj);
82 ghobj.shard_id = shard_id_t(0);
83 ghobj.generation = 0;
84 ghobj.hobj.snap = 0;
85 return lfn_generate_object_name_current(ghobj).size();
86}
87
88int LFNIndex::init()
89{
90 return _init();
91}
92
93int LFNIndex::created(const ghobject_t &oid, const char *path)
94{
95 WRAP_RETRY(
96 vector<string> path_comp;
97 string short_name;
98 r = decompose_full_path(path, &path_comp, 0, &short_name);
99 if (r < 0)
100 goto out;
101 r = lfn_created(path_comp, oid, short_name);
102 if (r < 0) {
103 if (failed) {
104 /* This is hacky, but the only way we get ENOENT from lfn_created here is
105 * if we did a failure injection in _created below AND actually started the
106 * split or merge. In that case, lfn_created already suceeded, and
107 * WRAP_RETRY already cleaned it up and we are actually done. In a real
108 * failure, the filestore itself would have ended up calling this with
109 * the new path, not the old one, so we'd find it.
110 */
111 r = 0;
112 }
113 goto out;
114 }
115 r = _created(path_comp, oid, short_name);
116 if (r < 0)
117 goto out;
118 );
119}
120
121int LFNIndex::unlink(const ghobject_t &oid)
122{
123 WRAP_RETRY(
124 vector<string> path;
125 string short_name;
126 r = _lookup(oid, &path, &short_name, NULL);
127 if (r < 0) {
128 goto out;
129 }
130 r = _remove(path, oid, short_name);
131 if (r < 0) {
132 goto out;
133 }
134 );
135}
136
137int LFNIndex::lookup(const ghobject_t &oid,
138 IndexedPath *out_path,
139 int *hardlink)
140{
141 WRAP_RETRY(
142 vector<string> path;
143 string short_name;
144 r = _lookup(oid, &path, &short_name, hardlink);
145 if (r < 0)
146 goto out;
147 string full_path = get_full_path(path, short_name);
148 *out_path = std::make_shared<Path>(full_path, this);
149 r = 0;
150 );
151}
152
153int LFNIndex::pre_hash_collection(uint32_t pg_num, uint64_t expected_num_objs)
154{
155 return _pre_hash_collection(pg_num, expected_num_objs);
156}
157
158
159int LFNIndex::collection_list_partial(const ghobject_t &start,
160 const ghobject_t &end,
161 int max_count,
162 vector<ghobject_t> *ls,
163 ghobject_t *next)
164{
165 return _collection_list_partial(start, end, max_count, ls, next);
166}
167
168/* Derived class utility methods */
169
170int LFNIndex::fsync_dir(const vector<string> &path)
171{
172 maybe_inject_failure();
91327a77 173 int fd = ::open(get_full_path_subdir(path).c_str(), O_RDONLY|O_CLOEXEC);
7c673cae
FG
174 if (fd < 0)
175 return -errno;
176 FDCloser f(fd);
177 maybe_inject_failure();
178 int r = ::fsync(fd);
179 maybe_inject_failure();
a8e16298
TL
180 if (r < 0) {
181 derr << __func__ << " fsync failed: " << cpp_strerror(errno) << dendl;
182 ceph_abort();
183 }
184 return 0;
7c673cae
FG
185}
186
187int LFNIndex::link_object(const vector<string> &from,
188 const vector<string> &to,
189 const ghobject_t &oid,
190 const string &from_short_name)
191{
192 int r;
193 string from_path = get_full_path(from, from_short_name);
194 string to_path;
195 maybe_inject_failure();
196 r = lfn_get_name(to, oid, 0, &to_path, 0);
197 if (r < 0)
198 return r;
199 maybe_inject_failure();
200 r = ::link(from_path.c_str(), to_path.c_str());
201 maybe_inject_failure();
202 if (r < 0)
203 return -errno;
204 else
205 return 0;
206}
207
208int LFNIndex::remove_objects(const vector<string> &dir,
209 const map<string, ghobject_t> &to_remove,
210 map<string, ghobject_t> *remaining)
211{
212 set<string> clean_chains;
213 for (map<string, ghobject_t>::const_iterator to_clean = to_remove.begin();
214 to_clean != to_remove.end();
215 ++to_clean) {
216 if (!lfn_is_hashed_filename(to_clean->first)) {
217 maybe_inject_failure();
218 int r = ::unlink(get_full_path(dir, to_clean->first).c_str());
219 maybe_inject_failure();
220 if (r < 0)
221 return -errno;
222 continue;
223 }
224 if (clean_chains.count(lfn_get_short_name(to_clean->second, 0)))
225 continue;
226 set<int> holes;
227 map<int, pair<string, ghobject_t> > chain;
228 for (int i = 0; ; ++i) {
229 string short_name = lfn_get_short_name(to_clean->second, i);
230 if (remaining->count(short_name)) {
231 chain[i] = *(remaining->find(short_name));
232 } else if (to_remove.count(short_name)) {
233 holes.insert(i);
234 } else {
235 break;
236 }
237 }
238
239 map<int, pair<string, ghobject_t > >::reverse_iterator candidate = chain.rbegin();
240 for (set<int>::iterator i = holes.begin();
241 i != holes.end();
242 ++i) {
243 if (candidate == chain.rend() || *i > candidate->first) {
244 string remove_path_name =
245 get_full_path(dir, lfn_get_short_name(to_clean->second, *i));
246 maybe_inject_failure();
247 int r = ::unlink(remove_path_name.c_str());
248 maybe_inject_failure();
249 if (r < 0)
250 return -errno;
251 continue;
252 }
253 string from = get_full_path(dir, candidate->second.first);
254 string to = get_full_path(dir, lfn_get_short_name(candidate->second.second, *i));
255 maybe_inject_failure();
256 int r = ::rename(from.c_str(), to.c_str());
257 maybe_inject_failure();
258 if (r < 0)
259 return -errno;
260 remaining->erase(candidate->second.first);
261 remaining->insert(pair<string, ghobject_t>(
262 lfn_get_short_name(candidate->second.second, *i),
263 candidate->second.second));
264 ++candidate;
265 }
266 if (!holes.empty())
267 clean_chains.insert(lfn_get_short_name(to_clean->second, 0));
268 }
269 return 0;
270}
271
272int LFNIndex::move_objects(const vector<string> &from,
273 const vector<string> &to)
274{
275 map<string, ghobject_t> to_move;
276 int r;
277 r = list_objects(from, 0, NULL, &to_move);
278 if (r < 0)
279 return r;
280 for (map<string,ghobject_t>::iterator i = to_move.begin();
281 i != to_move.end();
282 ++i) {
283 string from_path = get_full_path(from, i->first);
284 string to_path, to_name;
285 r = lfn_get_name(to, i->second, &to_name, &to_path, 0);
286 if (r < 0)
287 return r;
288 maybe_inject_failure();
289 r = ::link(from_path.c_str(), to_path.c_str());
290 if (r < 0 && errno != EEXIST)
291 return -errno;
292 maybe_inject_failure();
293 r = lfn_created(to, i->second, to_name);
294 maybe_inject_failure();
295 if (r < 0)
296 return r;
297 }
298 r = fsync_dir(to);
299 if (r < 0)
300 return r;
301 for (map<string,ghobject_t>::iterator i = to_move.begin();
302 i != to_move.end();
303 ++i) {
304 maybe_inject_failure();
305 r = ::unlink(get_full_path(from, i->first).c_str());
306 maybe_inject_failure();
307 if (r < 0)
308 return -errno;
309 }
310 return fsync_dir(from);
311}
312
313int LFNIndex::remove_object(const vector<string> &from,
314 const ghobject_t &oid)
315{
316 string short_name;
317 int r, exist;
318 maybe_inject_failure();
319 r = get_mangled_name(from, oid, &short_name, &exist);
320 maybe_inject_failure();
321 if (r < 0)
322 return r;
323 if (exist == 0)
324 return -ENOENT;
325 return lfn_unlink(from, oid, short_name);
326}
327
328int LFNIndex::get_mangled_name(const vector<string> &from,
329 const ghobject_t &oid,
330 string *mangled_name, int *hardlink)
331{
332 return lfn_get_name(from, oid, mangled_name, 0, hardlink);
333}
334
335int LFNIndex::move_subdir(
336 LFNIndex &from,
337 LFNIndex &dest,
338 const vector<string> &path,
339 string dir
340 )
341{
342 vector<string> sub_path(path.begin(), path.end());
343 sub_path.push_back(dir);
344 string from_path(from.get_full_path_subdir(sub_path));
345 string to_path(dest.get_full_path_subdir(sub_path));
346 int r = ::rename(from_path.c_str(), to_path.c_str());
347 if (r < 0)
348 return -errno;
349 return 0;
350}
351
352int LFNIndex::move_object(
353 LFNIndex &from,
354 LFNIndex &dest,
355 const vector<string> &path,
356 const pair<string, ghobject_t> &obj
357 )
358{
359 string from_path(from.get_full_path(path, obj.first));
360 string to_path;
361 string to_name;
362 int exists;
363 int r = dest.lfn_get_name(path, obj.second, &to_name, &to_path, &exists);
364 if (r < 0)
365 return r;
366 if (!exists) {
367 r = ::link(from_path.c_str(), to_path.c_str());
368 if (r < 0)
369 return r;
370 }
371 r = dest.lfn_created(path, obj.second, to_name);
372 if (r < 0)
373 return r;
374 r = dest.fsync_dir(path);
375 if (r < 0)
376 return r;
377 r = from.remove_object(path, obj.second);
378 if (r < 0)
379 return r;
380 return from.fsync_dir(path);
381}
382
383
384static int get_hobject_from_oinfo(const char *dir, const char *file,
385 ghobject_t *o)
386{
387 char path[PATH_MAX];
388 snprintf(path, sizeof(path), "%s/%s", dir, file);
389 // Hack, user.ceph._ is the attribute used to store the object info
390 bufferptr bp;
391 int r = chain_getxattr_buf(
392 path,
393 "user.ceph._",
394 &bp);
395 if (r < 0)
396 return r;
397 bufferlist bl;
398 if (r > 0)
399 bl.push_back(bp);
400 object_info_t oi(bl);
401 *o = ghobject_t(oi.soid);
402 return 0;
403}
404
405
406int LFNIndex::list_objects(const vector<string> &to_list, int max_objs,
407 long *handle, map<string, ghobject_t> *out)
408{
409 string to_list_path = get_full_path_subdir(to_list);
410 DIR *dir = ::opendir(to_list_path.c_str());
411 if (!dir) {
412 return -errno;
413 }
414
415 if (handle && *handle) {
416 seekdir(dir, *handle);
417 }
418
419 struct dirent *de = nullptr;
420 int r = 0;
421 int listed = 0;
422 bool end = true;
423 while ((de = ::readdir(dir))) {
424 end = false;
425 if (max_objs > 0 && listed >= max_objs) {
426 break;
427 }
428 if (de->d_name[0] == '.')
429 continue;
430 string short_name(de->d_name);
431 ghobject_t obj;
432 if (lfn_is_object(short_name)) {
433 r = lfn_translate(to_list, short_name, &obj);
434 if (r == -EINVAL) {
435 continue;
436 } else if (r < 0) {
437 goto cleanup;
438 } else {
439 string long_name = lfn_generate_object_name(obj);
440 if (!lfn_must_hash(long_name)) {
441 assert(long_name == short_name);
442 }
443 if (index_version == HASH_INDEX_TAG)
444 get_hobject_from_oinfo(to_list_path.c_str(), short_name.c_str(), &obj);
445
446 out->insert(pair<string, ghobject_t>(short_name, obj));
447 ++listed;
448 }
449 }
450 }
451
452 if (handle && !end) {
453 *handle = telldir(dir);
454 }
455
456 r = 0;
457 cleanup:
458 ::closedir(dir);
459 return r;
460}
461
462int LFNIndex::list_subdirs(const vector<string> &to_list,
463 vector<string> *out)
464{
465 string to_list_path = get_full_path_subdir(to_list);
466 DIR *dir = ::opendir(to_list_path.c_str());
467 if (!dir)
468 return -errno;
469
470 struct dirent *de = nullptr;
471 while ((de = ::readdir(dir))) {
472 string short_name(de->d_name);
473 string demangled_name;
474 if (lfn_is_subdir(short_name, &demangled_name)) {
475 out->push_back(demangled_name);
476 }
477 }
478
479 ::closedir(dir);
480 return 0;
481}
482
483int LFNIndex::create_path(const vector<string> &to_create)
484{
485 maybe_inject_failure();
486 int r = ::mkdir(get_full_path_subdir(to_create).c_str(), 0777);
487 maybe_inject_failure();
488 if (r < 0)
489 return -errno;
490 else
491 return 0;
492}
493
494int LFNIndex::remove_path(const vector<string> &to_remove)
495{
496 maybe_inject_failure();
497 int r = ::rmdir(get_full_path_subdir(to_remove).c_str());
498 maybe_inject_failure();
499 if (r < 0)
500 return -errno;
501 else
502 return 0;
503}
504
505int LFNIndex::path_exists(const vector<string> &to_check, int *exists)
506{
507 string full_path = get_full_path_subdir(to_check);
508 struct stat buf;
509 if (::stat(full_path.c_str(), &buf)) {
510 int r = -errno;
511 if (r == -ENOENT) {
512 *exists = 0;
513 return 0;
514 } else {
515 return r;
516 }
517 } else {
518 *exists = 1;
519 return 0;
520 }
521}
522
523int LFNIndex::add_attr_path(const vector<string> &path,
524 const string &attr_name,
525 bufferlist &attr_value)
526{
527 string full_path = get_full_path_subdir(path);
528 maybe_inject_failure();
529 return chain_setxattr<false, true>(
530 full_path.c_str(), mangle_attr_name(attr_name).c_str(),
531 reinterpret_cast<void *>(attr_value.c_str()),
532 attr_value.length());
533}
534
535int LFNIndex::get_attr_path(const vector<string> &path,
536 const string &attr_name,
537 bufferlist &attr_value)
538{
539 string full_path = get_full_path_subdir(path);
540 bufferptr bp;
541 int r = chain_getxattr_buf(
542 full_path.c_str(),
543 mangle_attr_name(attr_name).c_str(),
544 &bp);
545 if (r > 0)
546 attr_value.push_back(bp);
547 return r;
548}
549
550int LFNIndex::remove_attr_path(const vector<string> &path,
551 const string &attr_name)
552{
553 string full_path = get_full_path_subdir(path);
554 string mangled_attr_name = mangle_attr_name(attr_name);
555 maybe_inject_failure();
556 return chain_removexattr(full_path.c_str(), mangled_attr_name.c_str());
557}
558
559string LFNIndex::lfn_generate_object_name_keyless(const ghobject_t &oid)
560{
561 char s[FILENAME_MAX_LEN];
562 char *end = s + sizeof(s);
563 char *t = s;
564
565 assert(oid.generation == ghobject_t::NO_GEN);
566 const char *i = oid.hobj.oid.name.c_str();
567 // Escape subdir prefix
568 if (oid.hobj.oid.name.substr(0, 4) == "DIR_") {
569 *t++ = '\\';
570 *t++ = 'd';
571 i += 4;
572 }
573 while (*i && t < end) {
574 if (*i == '\\') {
575 *t++ = '\\';
576 *t++ = '\\';
577 } else if (*i == '.' && i == oid.hobj.oid.name.c_str()) { // only escape leading .
578 *t++ = '\\';
579 *t++ = '.';
580 } else if (*i == '/') {
581 *t++ = '\\';
582 *t++ = 's';
583 } else
584 *t++ = *i;
585 i++;
586 }
587
588 if (oid.hobj.snap == CEPH_NOSNAP)
589 t += snprintf(t, end - t, "_head");
590 else if (oid.hobj.snap == CEPH_SNAPDIR)
591 t += snprintf(t, end - t, "_snapdir");
592 else
593 t += snprintf(t, end - t, "_%llx", (long long unsigned)oid.hobj.snap);
594 snprintf(t, end - t, "_%.*X", (int)(sizeof(oid.hobj.get_hash())*2), oid.hobj.get_hash());
595
596 return string(s);
597}
598
599static void append_escaped(string::const_iterator begin,
600 string::const_iterator end,
601 string *out)
602{
603 for (string::const_iterator i = begin; i != end; ++i) {
604 if (*i == '\\') {
605 out->append("\\\\");
606 } else if (*i == '/') {
607 out->append("\\s");
608 } else if (*i == '_') {
609 out->append("\\u");
610 } else if (*i == '\0') {
611 out->append("\\n");
612 } else {
613 out->append(i, i+1);
614 }
615 }
616}
617
618string LFNIndex::lfn_generate_object_name_current(const ghobject_t &oid)
619{
620 string full_name;
621 string::const_iterator i = oid.hobj.oid.name.begin();
622 if (oid.hobj.oid.name.substr(0, 4) == "DIR_") {
623 full_name.append("\\d");
624 i += 4;
625 } else if (oid.hobj.oid.name[0] == '.') {
626 full_name.append("\\.");
627 ++i;
628 }
629 append_escaped(i, oid.hobj.oid.name.end(), &full_name);
630 full_name.append("_");
631 append_escaped(oid.hobj.get_key().begin(), oid.hobj.get_key().end(), &full_name);
632 full_name.append("_");
633
634 char buf[PATH_MAX];
635 char *t = buf;
636 const char *end = t + sizeof(buf);
637 if (oid.hobj.snap == CEPH_NOSNAP)
638 t += snprintf(t, end - t, "head");
639 else if (oid.hobj.snap == CEPH_SNAPDIR)
640 t += snprintf(t, end - t, "snapdir");
641 else
642 t += snprintf(t, end - t, "%llx", (long long unsigned)oid.hobj.snap);
643 t += snprintf(t, end - t, "_%.*X", (int)(sizeof(oid.hobj.get_hash())*2), oid.hobj.get_hash());
644 full_name.append(buf, t);
645 full_name.append("_");
646
647 append_escaped(oid.hobj.nspace.begin(), oid.hobj.nspace.end(), &full_name);
648 full_name.append("_");
649
650 t = buf;
651 if (oid.hobj.pool == -1)
652 t += snprintf(t, end - t, "none");
653 else
654 t += snprintf(t, end - t, "%llx", (long long unsigned)oid.hobj.pool);
655 full_name.append(buf, t);
656
657 if (oid.generation != ghobject_t::NO_GEN ||
658 oid.shard_id != shard_id_t::NO_SHARD) {
659 full_name.append("_");
660
661 t = buf;
662 t += snprintf(t, end - buf, "%llx", (long long unsigned)oid.generation);
663 full_name.append(buf, t);
664
665 full_name.append("_");
666
667 t = buf;
668 t += snprintf(t, end - buf, "%x", (int)oid.shard_id);
669 full_name.append(buf, t);
670 }
671
672 return full_name;
673}
674
675string LFNIndex::lfn_generate_object_name_poolless(const ghobject_t &oid)
676{
677 if (index_version == HASH_INDEX_TAG)
678 return lfn_generate_object_name_keyless(oid);
679
680 assert(oid.generation == ghobject_t::NO_GEN);
681 string full_name;
682 string::const_iterator i = oid.hobj.oid.name.begin();
683 if (oid.hobj.oid.name.substr(0, 4) == "DIR_") {
684 full_name.append("\\d");
685 i += 4;
686 } else if (oid.hobj.oid.name[0] == '.') {
687 full_name.append("\\.");
688 ++i;
689 }
690 append_escaped(i, oid.hobj.oid.name.end(), &full_name);
691 full_name.append("_");
692 append_escaped(oid.hobj.get_key().begin(), oid.hobj.get_key().end(), &full_name);
693 full_name.append("_");
694
695 char snap_with_hash[PATH_MAX];
696 char *t = snap_with_hash;
697 char *end = t + sizeof(snap_with_hash);
698 if (oid.hobj.snap == CEPH_NOSNAP)
699 t += snprintf(t, end - t, "head");
700 else if (oid.hobj.snap == CEPH_SNAPDIR)
701 t += snprintf(t, end - t, "snapdir");
702 else
703 t += snprintf(t, end - t, "%llx", (long long unsigned)oid.hobj.snap);
704 snprintf(t, end - t, "_%.*X", (int)(sizeof(oid.hobj.get_hash())*2), oid.hobj.get_hash());
705 full_name += string(snap_with_hash);
706 return full_name;
707}
708
709int LFNIndex::lfn_get_name(const vector<string> &path,
710 const ghobject_t &oid,
711 string *mangled_name, string *out_path,
712 int *hardlink)
713{
714 string full_name = lfn_generate_object_name(oid);
715 int r;
716
717 if (!lfn_must_hash(full_name)) {
718 if (mangled_name)
719 *mangled_name = full_name;
720 if (out_path)
721 *out_path = get_full_path(path, full_name);
722 if (hardlink) {
723 struct stat buf;
724 string full_path = get_full_path(path, full_name);
725 maybe_inject_failure();
726 r = ::stat(full_path.c_str(), &buf);
727 if (r < 0) {
728 if (errno == ENOENT)
729 *hardlink = 0;
730 else
731 return -errno;
732 } else {
733 *hardlink = buf.st_nlink;
734 }
735 }
736 return 0;
737 }
738
739 int i = 0;
740 string candidate;
741 string candidate_path;
742 for ( ; ; ++i) {
743 candidate = lfn_get_short_name(oid, i);
744 candidate_path = get_full_path(path, candidate);
745 bufferptr bp;
746 r = chain_getxattr_buf(
747 candidate_path.c_str(),
748 get_lfn_attr().c_str(),
749 &bp);
750 if (r < 0) {
751 if (errno != ENODATA && errno != ENOENT)
752 return -errno;
753 if (errno == ENODATA) {
754 // Left over from incomplete transaction, it'll be replayed
755 maybe_inject_failure();
756 r = ::unlink(candidate_path.c_str());
757 maybe_inject_failure();
758 if (r < 0)
759 return -errno;
760 }
761 if (mangled_name)
762 *mangled_name = candidate;
763 if (out_path)
764 *out_path = candidate_path;
765 if (hardlink)
766 *hardlink = 0;
767 return 0;
768 }
769 assert(r > 0);
770 string lfn(bp.c_str(), bp.length());
771 if (lfn == full_name) {
772 if (mangled_name)
773 *mangled_name = candidate;
774 if (out_path)
775 *out_path = candidate_path;
776 if (hardlink) {
777 struct stat st;
778 r = ::stat(candidate_path.c_str(), &st);
779 if (r < 0) {
780 if (errno == ENOENT)
781 *hardlink = 0;
782 else
783 return -errno;
784 } else {
785 *hardlink = st.st_nlink;
786 }
787 }
788 return 0;
789 }
790 bp = bufferptr();
791 r = chain_getxattr_buf(
792 candidate_path.c_str(),
793 get_alt_lfn_attr().c_str(),
794 &bp);
795 if (r > 0) {
796 // only consider alt name if nlink > 1
797 struct stat st;
798 int rc = ::stat(candidate_path.c_str(), &st);
799 if (rc < 0)
800 return -errno;
801 if (st.st_nlink <= 1) {
802 // left over from incomplete unlink, remove
803 maybe_inject_failure();
804 dout(20) << __func__ << " found extra alt attr for " << candidate_path
805 << ", long name " << string(bp.c_str(), bp.length()) << dendl;
806 rc = chain_removexattr(candidate_path.c_str(),
807 get_alt_lfn_attr().c_str());
808 maybe_inject_failure();
809 if (rc < 0)
810 return rc;
811 continue;
812 }
813 string lfn(bp.c_str(), bp.length());
814 if (lfn == full_name) {
815 dout(20) << __func__ << " used alt attr for " << full_name << dendl;
816 if (mangled_name)
817 *mangled_name = candidate;
818 if (out_path)
819 *out_path = candidate_path;
820 if (hardlink)
821 *hardlink = st.st_nlink;
822 return 0;
823 }
824 }
825 }
826 ceph_abort(); // Unreachable
827 return 0;
828}
829
830int LFNIndex::lfn_created(const vector<string> &path,
831 const ghobject_t &oid,
832 const string &mangled_name)
833{
834 if (!lfn_is_hashed_filename(mangled_name))
835 return 0;
836 string full_path = get_full_path(path, mangled_name);
837 string full_name = lfn_generate_object_name(oid);
838 maybe_inject_failure();
839
840 // if the main attr exists and is different, move it to the alt attr.
841 bufferptr bp;
842 int r = chain_getxattr_buf(
843 full_path.c_str(),
844 get_lfn_attr().c_str(),
845 &bp);
846 if (r > 0) {
847 string lfn(bp.c_str(), bp.length());
848 if (lfn != full_name) {
849 dout(20) << __func__ << " " << mangled_name
850 << " moving old name to alt attr "
851 << lfn
852 << ", new name is " << full_name << dendl;
853 r = chain_setxattr<false, true>(
854 full_path.c_str(), get_alt_lfn_attr().c_str(),
855 bp.c_str(), bp.length());
856 if (r < 0)
857 return r;
858 }
859 }
860
861 return chain_setxattr<false, true>(
862 full_path.c_str(), get_lfn_attr().c_str(),
863 full_name.c_str(), full_name.size());
864}
865
866int LFNIndex::lfn_unlink(const vector<string> &path,
867 const ghobject_t &oid,
868 const string &mangled_name)
869{
870 if (!lfn_is_hashed_filename(mangled_name)) {
871 string full_path = get_full_path(path, mangled_name);
872 maybe_inject_failure();
873 int r = ::unlink(full_path.c_str());
874 maybe_inject_failure();
875 if (r < 0)
876 return -errno;
877 return 0;
878 }
879
880 int i = 0;
881 for ( ; ; ++i) {
882 string candidate = lfn_get_short_name(oid, i);
883 if (candidate == mangled_name)
884 break;
885 }
886 int removed_index = i;
887 ++i;
888 for ( ; ; ++i) {
889 struct stat buf;
890 string to_check = lfn_get_short_name(oid, i);
891 string to_check_path = get_full_path(path, to_check);
892 int r = ::stat(to_check_path.c_str(), &buf);
893 if (r < 0) {
894 if (errno == ENOENT) {
895 break;
896 } else {
897 return -errno;
898 }
899 }
900 }
901 string full_path = get_full_path(path, mangled_name);
91327a77 902 int fd = ::open(full_path.c_str(), O_RDONLY|O_CLOEXEC);
7c673cae
FG
903 if (fd < 0)
904 return -errno;
905 FDCloser f(fd);
906 if (i == removed_index + 1) {
907 maybe_inject_failure();
908 int r = ::unlink(full_path.c_str());
909 maybe_inject_failure();
910 if (r < 0)
911 return -errno;
912 } else {
913 string& rename_to = full_path;
914 string rename_from = get_full_path(path, lfn_get_short_name(oid, i - 1));
915 maybe_inject_failure();
916 int r = ::rename(rename_from.c_str(), rename_to.c_str());
917 maybe_inject_failure();
918 if (r < 0)
919 return -errno;
920 }
921 struct stat st;
922 int r = ::fstat(fd, &st);
923 if (r == 0 && st.st_nlink > 0) {
924 // remove alt attr
925 dout(20) << __func__ << " removing alt attr from " << full_path << dendl;
926 fsync_dir(path);
927 chain_fremovexattr(fd, get_alt_lfn_attr().c_str());
928 }
929 return r;
930}
931
932int LFNIndex::lfn_translate(const vector<string> &path,
933 const string &short_name,
934 ghobject_t *out)
935{
936 if (!lfn_is_hashed_filename(short_name)) {
937 return lfn_parse_object_name(short_name, out);
938 }
939 string full_path = get_full_path(path, short_name);
940 // First, check alt attr
941 bufferptr bp;
942 int r = chain_getxattr_buf(
943 full_path.c_str(),
944 get_alt_lfn_attr().c_str(),
945 &bp);
946 if (r > 0) {
947 // There is an alt attr, does it match?
948 string lfn(bp.c_str(), bp.length());
949 if (short_name_matches(short_name.c_str(), lfn.c_str())) {
950 return lfn_parse_object_name(lfn, out);
951 }
952 }
953
954 // Get lfn_attr
955 bp = bufferptr();
956 r = chain_getxattr_buf(
957 full_path.c_str(),
958 get_lfn_attr().c_str(),
959 &bp);
960 if (r < 0)
961 return r;
962 if (r == 0)
963 return -EINVAL;
964
965 string long_name(bp.c_str(), bp.length());
966 return lfn_parse_object_name(long_name, out);
967}
968
969bool LFNIndex::lfn_is_object(const string &short_name)
970{
971 return lfn_is_hashed_filename(short_name) || !lfn_is_subdir(short_name, 0);
972}
973
974bool LFNIndex::lfn_is_subdir(const string &name, string *demangled)
975{
976 if (name.substr(0, SUBDIR_PREFIX.size()) == SUBDIR_PREFIX) {
977 if (demangled)
978 *demangled = demangle_path_component(name);
979 return 1;
980 }
981 return 0;
982}
983
984static int parse_object(const char *s, ghobject_t& o)
985{
986 const char *hash = s + strlen(s) - 1;
987 while (*hash != '_' &&
988 hash > s)
989 hash--;
990 const char *bar = hash - 1;
991 while (*bar != '_' &&
992 bar > s)
993 bar--;
994 if (*bar == '_') {
995 char buf[bar-s + 1];
996 char *t = buf;
997 const char *i = s;
998 while (i < bar) {
999 if (*i == '\\') {
1000 i++;
1001 switch (*i) {
1002 case '\\': *t++ = '\\'; break;
1003 case '.': *t++ = '.'; break;
1004 case 's': *t++ = '/'; break;
1005 case 'd': {
1006 *t++ = 'D';
1007 *t++ = 'I';
1008 *t++ = 'R';
1009 *t++ = '_';
1010 break;
1011 }
1012 default: ceph_abort();
1013 }
1014 } else {
1015 *t++ = *i;
1016 }
1017 i++;
1018 }
1019 *t = 0;
1020 o.hobj.oid.name = string(buf, t-buf);
1021 if (strncmp(bar+1, "head", 4) == 0)
1022 o.hobj.snap = CEPH_NOSNAP;
1023 else if (strncmp(bar+1, "snapdir", 7) == 0)
1024 o.hobj.snap = CEPH_SNAPDIR;
1025 else
1026 o.hobj.snap = strtoull(bar+1, NULL, 16);
1027
1028 uint32_t hobject_hash_input;
1029 sscanf(hash, "_%X", &hobject_hash_input);
1030 o.hobj.set_hash(hobject_hash_input);
1031
1032 return 1;
1033 }
1034 return 0;
1035}
1036
1037int LFNIndex::lfn_parse_object_name_keyless(const string &long_name, ghobject_t *out)
1038{
1039 int r = parse_object(long_name.c_str(), *out);
1040 int64_t pool = -1;
1041 spg_t pg;
1042 if (coll().is_pg_prefix(&pg))
1043 pool = (int64_t)pg.pgid.pool();
1044 out->hobj.pool = pool;
1045 if (!r) return -EINVAL;
1046 string temp = lfn_generate_object_name(*out);
1047 return r ? 0 : -EINVAL;
1048}
1049
1050static bool append_unescaped(string::const_iterator begin,
1051 string::const_iterator end,
1052 string *out)
1053{
1054 for (string::const_iterator i = begin; i != end; ++i) {
1055 if (*i == '\\') {
1056 ++i;
1057 if (*i == '\\')
1058 out->append("\\");
1059 else if (*i == 's')
1060 out->append("/");
1061 else if (*i == 'n')
1062 (*out) += '\0';
1063 else if (*i == 'u')
1064 out->append("_");
1065 else
1066 return false;
1067 } else {
1068 out->append(i, i+1);
1069 }
1070 }
1071 return true;
1072}
1073
1074int LFNIndex::lfn_parse_object_name_poolless(const string &long_name,
1075 ghobject_t *out)
1076{
1077 string name;
1078 string key;
1079 uint32_t hash;
1080 snapid_t snap;
1081
1082 string::const_iterator current = long_name.begin();
1083 if (*current == '\\') {
1084 ++current;
1085 if (current == long_name.end()) {
1086 return -EINVAL;
1087 } else if (*current == 'd') {
1088 name.append("DIR_");
1089 ++current;
1090 } else if (*current == '.') {
1091 name.append(".");
1092 ++current;
1093 } else {
1094 --current;
1095 }
1096 }
1097
1098 string::const_iterator end = current;
1099 for ( ; end != long_name.end() && *end != '_'; ++end) ;
1100 if (end == long_name.end())
1101 return -EINVAL;
1102 if (!append_unescaped(current, end, &name))
1103 return -EINVAL;
1104
1105 current = ++end;
1106 for ( ; end != long_name.end() && *end != '_'; ++end) ;
1107 if (end == long_name.end())
1108 return -EINVAL;
1109 if (!append_unescaped(current, end, &key))
1110 return -EINVAL;
1111
1112 current = ++end;
1113 for ( ; end != long_name.end() && *end != '_'; ++end) ;
1114 if (end == long_name.end())
1115 return -EINVAL;
1116 string snap_str(current, end);
1117
1118 current = ++end;
1119 for ( ; end != long_name.end() && *end != '_'; ++end) ;
1120 if (end != long_name.end())
1121 return -EINVAL;
1122 string hash_str(current, end);
1123
1124 if (snap_str == "head")
1125 snap = CEPH_NOSNAP;
1126 else if (snap_str == "snapdir")
1127 snap = CEPH_SNAPDIR;
1128 else
1129 snap = strtoull(snap_str.c_str(), NULL, 16);
1130 sscanf(hash_str.c_str(), "%X", &hash);
1131
1132
1133 int64_t pool = -1;
1134 spg_t pg;
1135 if (coll().is_pg_prefix(&pg))
1136 pool = (int64_t)pg.pgid.pool();
1137 (*out) = ghobject_t(hobject_t(name, key, snap, hash, pool, ""));
1138 return 0;
1139}
1140
1141
1142int LFNIndex::lfn_parse_object_name(const string &long_name, ghobject_t *out)
1143{
1144 string name;
1145 string key;
1146 string ns;
1147 uint32_t hash;
1148 snapid_t snap;
1149 uint64_t pool;
1150 gen_t generation = ghobject_t::NO_GEN;
1151 shard_id_t shard_id = shard_id_t::NO_SHARD;
1152
1153 if (index_version == HASH_INDEX_TAG)
1154 return lfn_parse_object_name_keyless(long_name, out);
1155 if (index_version == HASH_INDEX_TAG_2)
1156 return lfn_parse_object_name_poolless(long_name, out);
1157
1158 string::const_iterator current = long_name.begin();
1159 if (*current == '\\') {
1160 ++current;
1161 if (current == long_name.end()) {
1162 return -EINVAL;
1163 } else if (*current == 'd') {
1164 name.append("DIR_");
1165 ++current;
1166 } else if (*current == '.') {
1167 name.append(".");
1168 ++current;
1169 } else {
1170 --current;
1171 }
1172 }
1173
1174 string::const_iterator end = current;
1175 for ( ; end != long_name.end() && *end != '_'; ++end) ;
1176 if (end == long_name.end())
1177 return -EINVAL;
1178 if (!append_unescaped(current, end, &name))
1179 return -EINVAL;
1180
1181 current = ++end;
1182 for ( ; end != long_name.end() && *end != '_'; ++end) ;
1183 if (end == long_name.end())
1184 return -EINVAL;
1185 if (!append_unescaped(current, end, &key))
1186 return -EINVAL;
1187
1188 current = ++end;
1189 for ( ; end != long_name.end() && *end != '_'; ++end) ;
1190 if (end == long_name.end())
1191 return -EINVAL;
1192 string snap_str(current, end);
1193
1194 current = ++end;
1195 for ( ; end != long_name.end() && *end != '_'; ++end) ;
1196 if (end == long_name.end())
1197 return -EINVAL;
1198 string hash_str(current, end);
1199
1200 current = ++end;
1201 for ( ; end != long_name.end() && *end != '_'; ++end) ;
1202 if (end == long_name.end())
1203 return -EINVAL;
1204 if (!append_unescaped(current, end, &ns))
1205 return -EINVAL;
1206
1207 current = ++end;
1208 for ( ; end != long_name.end() && *end != '_'; ++end) ;
1209 string pstring(current, end);
1210
1211 // Optional generation/shard_id
1212 string genstring, shardstring;
1213 if (end != long_name.end()) {
1214 current = ++end;
1215 for ( ; end != long_name.end() && *end != '_'; ++end) ;
1216 if (end == long_name.end())
1217 return -EINVAL;
1218 genstring = string(current, end);
1219
1220 generation = (gen_t)strtoull(genstring.c_str(), NULL, 16);
1221
1222 current = ++end;
1223 for ( ; end != long_name.end() && *end != '_'; ++end) ;
1224 if (end != long_name.end())
1225 return -EINVAL;
1226 shardstring = string(current, end);
1227
1228 shard_id = (shard_id_t)strtoul(shardstring.c_str(), NULL, 16);
1229 }
1230
1231 if (snap_str == "head")
1232 snap = CEPH_NOSNAP;
1233 else if (snap_str == "snapdir")
1234 snap = CEPH_SNAPDIR;
1235 else
1236 snap = strtoull(snap_str.c_str(), NULL, 16);
1237 sscanf(hash_str.c_str(), "%X", &hash);
1238
1239 if (pstring == "none")
1240 pool = (uint64_t)-1;
1241 else
1242 pool = strtoull(pstring.c_str(), NULL, 16);
1243
1244 (*out) = ghobject_t(hobject_t(name, key, snap, hash, (int64_t)pool, ns), generation, shard_id);
1245 return 0;
1246}
1247
1248bool LFNIndex::lfn_is_hashed_filename(const string &name)
1249{
1250 if (name.size() < (unsigned)FILENAME_SHORT_LEN) {
1251 return 0;
1252 }
1253 if (name.substr(name.size() - FILENAME_COOKIE.size(), FILENAME_COOKIE.size())
1254 == FILENAME_COOKIE) {
1255 return 1;
1256 } else {
1257 return 0;
1258 }
1259}
1260
1261bool LFNIndex::lfn_must_hash(const string &long_name)
1262{
1263 return (int)long_name.size() >= FILENAME_SHORT_LEN;
1264}
1265
1266static inline void buf_to_hex(const unsigned char *buf, int len, char *str)
1267{
1268 int i;
1269 str[0] = '\0';
1270 for (i = 0; i < len; i++) {
1271 sprintf(&str[i*2], "%02x", (int)buf[i]);
1272 }
1273}
1274
1275int LFNIndex::hash_filename(const char *filename, char *hash, int buf_len)
1276{
1277 if (buf_len < FILENAME_HASH_LEN + 1)
1278 return -EINVAL;
1279
1280 char buf[FILENAME_LFN_DIGEST_SIZE];
1281 char hex[FILENAME_LFN_DIGEST_SIZE * 2];
1282
1283 SHA1 h;
1284 h.Update((const byte *)filename, strlen(filename));
1285 h.Final((byte *)buf);
1286
1287 buf_to_hex((byte *)buf, (FILENAME_HASH_LEN + 1) / 2, hex);
1288 strncpy(hash, hex, FILENAME_HASH_LEN);
1289 hash[FILENAME_HASH_LEN] = '\0';
1290 return 0;
1291}
1292
1293void LFNIndex::build_filename(const char *old_filename, int i, char *filename, int len)
1294{
1295 char hash[FILENAME_HASH_LEN + 1];
1296
1297 assert(len >= FILENAME_SHORT_LEN + 4);
1298
1299 strncpy(filename, old_filename, FILENAME_PREFIX_LEN);
1300 filename[FILENAME_PREFIX_LEN] = '\0';
1301 if ((int)strlen(filename) < FILENAME_PREFIX_LEN)
1302 return;
1303 if (old_filename[FILENAME_PREFIX_LEN] == '\0')
1304 return;
1305
1306 hash_filename(old_filename, hash, sizeof(hash));
1307 int ofs = FILENAME_PREFIX_LEN;
1308 while (1) {
1309 int suffix_len = sprintf(filename + ofs, "_%s_%d_%s", hash, i, FILENAME_COOKIE.c_str());
1310 if (ofs + suffix_len <= FILENAME_SHORT_LEN || !ofs)
1311 break;
1312 ofs--;
1313 }
1314}
1315
1316bool LFNIndex::short_name_matches(const char *short_name, const char *cand_long_name)
1317{
1318 const char *end = short_name;
1319 while (*end) ++end;
1320 const char *suffix = end;
1321 if (suffix > short_name) --suffix; // last char
1322 while (suffix > short_name && *suffix != '_') --suffix; // back to first _
1323 if (suffix > short_name) --suffix; // one behind that
1324 while (suffix > short_name && *suffix != '_') --suffix; // back to second _
1325
1326 int index = -1;
1327 char buf[FILENAME_SHORT_LEN + 4];
1328 assert((end - suffix) < (int)sizeof(buf));
1329 int r = sscanf(suffix, "_%d_%s", &index, buf);
1330 if (r < 2)
1331 return false;
1332 if (strcmp(buf, FILENAME_COOKIE.c_str()) != 0)
1333 return false;
1334 build_filename(cand_long_name, index, buf, sizeof(buf));
1335 return strcmp(short_name, buf) == 0;
1336}
1337
1338string LFNIndex::lfn_get_short_name(const ghobject_t &oid, int i)
1339{
1340 string long_name = lfn_generate_object_name(oid);
1341 assert(lfn_must_hash(long_name));
1342 char buf[FILENAME_SHORT_LEN + 4];
1343 build_filename(long_name.c_str(), i, buf, sizeof(buf));
1344 return string(buf);
1345}
1346
1347const string &LFNIndex::get_base_path()
1348{
1349 return base_path;
1350}
1351
1352string LFNIndex::get_full_path_subdir(const vector<string> &rel)
1353{
1354 string retval = get_base_path();
1355 for (vector<string>::const_iterator i = rel.begin();
1356 i != rel.end();
1357 ++i) {
1358 retval += "/";
1359 retval += mangle_path_component(*i);
1360 }
1361 return retval;
1362}
1363
1364string LFNIndex::get_full_path(const vector<string> &rel, const string &name)
1365{
1366 return get_full_path_subdir(rel) + "/" + name;
1367}
1368
1369string LFNIndex::mangle_path_component(const string &component)
1370{
1371 return SUBDIR_PREFIX + component;
1372}
1373
1374string LFNIndex::demangle_path_component(const string &component)
1375{
1376 return component.substr(SUBDIR_PREFIX.size(), component.size() - SUBDIR_PREFIX.size());
1377}
1378
1379int LFNIndex::decompose_full_path(const char *in, vector<string> *out,
1380 ghobject_t *oid, string *shortname)
1381{
1382 const char *beginning = in + get_base_path().size();
1383 const char *end = beginning;
1384 while (1) {
1385 end++;
1386 beginning = end++;
1387 for ( ; *end != '\0' && *end != '/'; ++end) ;
1388 if (*end != '\0') {
1389 out->push_back(demangle_path_component(string(beginning, end - beginning)));
1390 continue;
1391 } else {
1392 break;
1393 }
1394 }
1395 *shortname = string(beginning, end - beginning);
1396 if (oid) {
1397 int r = lfn_translate(*out, *shortname, oid);
1398 if (r < 0)
1399 return r;
1400 }
1401 return 0;
1402}
1403
1404string LFNIndex::mangle_attr_name(const string &attr)
1405{
1406 return PHASH_ATTR_PREFIX + attr;
1407}