]> git.proxmox.com Git - ceph.git/blob - ceph/src/pmdk/src/tools/rpmemd/rpmemd_db.c
import ceph 16.2.7
[ceph.git] / ceph / src / pmdk / src / tools / rpmemd / rpmemd_db.c
1 // SPDX-License-Identifier: BSD-3-Clause
2 /* Copyright 2016-2020, Intel Corporation */
3
4 /*
5 * rpmemd_db.c -- rpmemd database of pool set files
6 */
7
8 #include <stdio.h>
9 #include <stdint.h>
10 #include <errno.h>
11 #include <string.h>
12 #include <unistd.h>
13 #include <dirent.h>
14 #include <sys/file.h>
15 #include <sys/mman.h>
16
17 #include "queue.h"
18 #include "set.h"
19 #include "os.h"
20 #include "out.h"
21 #include "file.h"
22 #include "sys_util.h"
23
24 #include "librpmem.h"
25 #include "rpmemd_db.h"
26 #include "rpmemd_log.h"
27
28 /*
29 * struct rpmemd_db -- pool set database structure
30 */
31 struct rpmemd_db {
32 os_mutex_t lock;
33 char *root_dir;
34 mode_t mode;
35 };
36
37 /*
38 * declaration of the 'struct list_head' type
39 */
40 PMDK_LIST_HEAD(list_head, rpmemd_db_entry);
41
42 /*
43 * struct rpmemd_db_entry -- entry in the pool set list
44 */
45 struct rpmemd_db_entry {
46 PMDK_LIST_ENTRY(rpmemd_db_entry) next;
47 char *pool_desc;
48 struct pool_set *set;
49 };
50
51 /*
52 * rpmemd_db_init -- initialize the rpmem database of pool set files
53 */
54 struct rpmemd_db *
55 rpmemd_db_init(const char *root_dir, mode_t mode)
56 {
57 if (root_dir[0] != '/') {
58 RPMEMD_LOG(ERR, "root directory is not an absolute path"
59 " -- '%s'", root_dir);
60 errno = EINVAL;
61 return NULL;
62 }
63 struct rpmemd_db *db = calloc(1, sizeof(*db));
64 if (!db) {
65 RPMEMD_LOG(ERR, "!allocating the rpmem database structure");
66 return NULL;
67 }
68
69 db->root_dir = strdup(root_dir);
70 if (!db->root_dir) {
71 RPMEMD_LOG(ERR, "!allocating the root dir path");
72 free(db);
73 return NULL;
74 }
75
76 db->mode = mode;
77
78 util_mutex_init(&db->lock);
79
80 return db;
81 }
82
83 /*
84 * rpmemd_db_concat -- (internal) concatenate two paths
85 */
86 static char *
87 rpmemd_db_concat(const char *path1, const char *path2)
88 {
89 size_t len1 = strlen(path1);
90 size_t len2 = strlen(path2);
91 size_t new_len = len1 + len2 + 2; /* +1 for '/' in snprintf() */
92
93 if (path1[0] != '/') {
94 RPMEMD_LOG(ERR, "the first path is not an absolute one -- '%s'",
95 path1);
96 errno = EINVAL;
97 return NULL;
98 }
99 if (path2[0] == '/') {
100 RPMEMD_LOG(ERR, "the second path is not a relative one -- '%s'",
101 path2);
102 /* set to EBADF to distinguish this case from other errors */
103 errno = EBADF;
104 return NULL;
105 }
106
107 char *new_str = malloc(new_len);
108 if (new_str == NULL) {
109 RPMEMD_LOG(ERR, "!allocating path buffer");
110 return NULL;
111 }
112
113 int ret = util_snprintf(new_str, new_len, "%s/%s", path1, path2);
114 if (ret < 0) {
115 RPMEMD_LOG(ERR, "!snprintf");
116 free(new_str);
117 errno = EINVAL;
118 return NULL;
119 }
120
121 return new_str;
122 }
123
124 /*
125 * rpmemd_db_get_path -- (internal) get the full path of the pool set file
126 */
127 static char *
128 rpmemd_db_get_path(struct rpmemd_db *db, const char *pool_desc)
129 {
130 return rpmemd_db_concat(db->root_dir, pool_desc);
131 }
132
133 /*
134 * rpmemd_db_pool_madvise -- (internal) workaround device dax alignment issue
135 */
136 static int
137 rpmemd_db_pool_madvise(struct pool_set *set)
138 {
139 /*
140 * This is a workaround for an issue with using device dax with
141 * libibverbs. The problem is that we use ibv_fork_init(3) which
142 * makes all registered memory being madvised with MADV_DONTFORK
143 * flag. In libpmemobj the remote replication is performed without
144 * pool header (first 4k). In such case the address passed to
145 * madvise(2) is aligned to 4k, but device dax can require different
146 * alignment (default is 2MB). This workaround madvises the entire
147 * memory region before registering it by ibv_reg_mr(3).
148 */
149 const struct pool_set_part *part = &set->replica[0]->part[0];
150 if (part->is_dev_dax) {
151 int ret = os_madvise(part->addr, part->filesize,
152 MADV_DONTFORK);
153 if (ret) {
154 ERR("!madvise");
155 return -1;
156 }
157 }
158 return 0;
159 }
160
161 /*
162 * rpmemd_get_attr -- (internal) get pool attributes from remote pool attributes
163 */
164 static void
165 rpmemd_get_attr(struct pool_attr *attr, const struct rpmem_pool_attr *rattr)
166 {
167 LOG(3, "attr %p, rattr %p", attr, rattr);
168 memcpy(attr->signature, rattr->signature, POOL_HDR_SIG_LEN);
169 attr->major = rattr->major;
170 attr->features.compat = rattr->compat_features;
171 attr->features.incompat = rattr->incompat_features;
172 attr->features.ro_compat = rattr->ro_compat_features;
173 memcpy(attr->poolset_uuid, rattr->poolset_uuid, POOL_HDR_UUID_LEN);
174 memcpy(attr->first_part_uuid, rattr->uuid, POOL_HDR_UUID_LEN);
175 memcpy(attr->prev_repl_uuid, rattr->prev_uuid, POOL_HDR_UUID_LEN);
176 memcpy(attr->next_repl_uuid, rattr->next_uuid, POOL_HDR_UUID_LEN);
177 memcpy(attr->arch_flags, rattr->user_flags, POOL_HDR_ARCH_LEN);
178 }
179
180 /*
181 * rpmemd_db_pool_create -- create a new pool set
182 */
183 struct rpmemd_db_pool *
184 rpmemd_db_pool_create(struct rpmemd_db *db, const char *pool_desc,
185 size_t pool_size, const struct rpmem_pool_attr *rattr)
186 {
187 RPMEMD_ASSERT(db != NULL);
188
189 util_mutex_lock(&db->lock);
190
191 struct rpmemd_db_pool *prp = NULL;
192 struct pool_set *set;
193 char *path;
194 int ret;
195
196 prp = malloc(sizeof(struct rpmemd_db_pool));
197 if (!prp) {
198 RPMEMD_LOG(ERR, "!allocating pool set db entry");
199 goto err_unlock;
200 }
201
202 path = rpmemd_db_get_path(db, pool_desc);
203 if (!path) {
204 goto err_free_prp;
205 }
206
207 struct pool_attr attr;
208 struct pool_attr *pattr = NULL;
209 if (rattr != NULL) {
210 rpmemd_get_attr(&attr, rattr);
211 pattr = &attr;
212 }
213
214 ret = util_pool_create_uuids(&set, path, 0, RPMEM_MIN_POOL,
215 RPMEM_MIN_PART, pattr, NULL, REPLICAS_DISABLED,
216 POOL_REMOTE);
217 if (ret) {
218 RPMEMD_LOG(ERR, "!cannot create pool set -- '%s'", path);
219 goto err_free_path;
220 }
221
222 ret = util_poolset_chmod(set, db->mode);
223 if (ret) {
224 RPMEMD_LOG(ERR, "!cannot change pool set mode bits to 0%o",
225 db->mode);
226 }
227
228 if (rpmemd_db_pool_madvise(set))
229 goto err_poolset_close;
230
231 /* mark as opened */
232 prp->pool_addr = set->replica[0]->part[0].addr;
233 prp->pool_size = set->poolsize;
234 prp->set = set;
235
236 free(path);
237 util_mutex_unlock(&db->lock);
238
239 return prp;
240
241 err_poolset_close:
242 util_poolset_close(set, DO_NOT_DELETE_PARTS);
243 err_free_path:
244 free(path);
245 err_free_prp:
246 free(prp);
247 err_unlock:
248 util_mutex_unlock(&db->lock);
249 return NULL;
250 }
251
252 /*
253 * rpmemd_db_pool_open -- open a pool set
254 */
255 struct rpmemd_db_pool *
256 rpmemd_db_pool_open(struct rpmemd_db *db, const char *pool_desc,
257 size_t pool_size, struct rpmem_pool_attr *rattr)
258 {
259 RPMEMD_ASSERT(db != NULL);
260 RPMEMD_ASSERT(rattr != NULL);
261
262 util_mutex_lock(&db->lock);
263
264 struct rpmemd_db_pool *prp = NULL;
265 struct pool_set *set;
266 char *path;
267 int ret;
268
269 prp = malloc(sizeof(struct rpmemd_db_pool));
270 if (!prp) {
271 RPMEMD_LOG(ERR, "!allocating pool set db entry");
272 goto err_unlock;
273 }
274
275 path = rpmemd_db_get_path(db, pool_desc);
276 if (!path) {
277 goto err_free_prp;
278 }
279
280 ret = util_pool_open_remote(&set, path, 0, RPMEM_MIN_PART, rattr);
281 if (ret) {
282 RPMEMD_LOG(ERR, "!cannot open pool set -- '%s'", path);
283 goto err_free_path;
284 }
285
286 if (rpmemd_db_pool_madvise(set))
287 goto err_poolset_close;
288
289 /* mark as opened */
290 prp->pool_addr = set->replica[0]->part[0].addr;
291 prp->pool_size = set->poolsize;
292 prp->set = set;
293
294 free(path);
295 util_mutex_unlock(&db->lock);
296
297 return prp;
298
299 err_poolset_close:
300 util_poolset_close(set, DO_NOT_DELETE_PARTS);
301 err_free_path:
302 free(path);
303 err_free_prp:
304 free(prp);
305 err_unlock:
306 util_mutex_unlock(&db->lock);
307 return NULL;
308 }
309
310 /*
311 * rpmemd_db_pool_close -- close a pool set
312 */
313 void
314 rpmemd_db_pool_close(struct rpmemd_db *db, struct rpmemd_db_pool *prp)
315 {
316 RPMEMD_ASSERT(db != NULL);
317
318 util_mutex_lock(&db->lock);
319
320 util_poolset_close(prp->set, DO_NOT_DELETE_PARTS);
321 free(prp);
322
323 util_mutex_unlock(&db->lock);
324 }
325
326 /*
327 * rpmemd_db_pool_set_attr -- overwrite pool attributes
328 */
329 int
330 rpmemd_db_pool_set_attr(struct rpmemd_db_pool *prp,
331 const struct rpmem_pool_attr *rattr)
332 {
333 RPMEMD_ASSERT(prp != NULL);
334 RPMEMD_ASSERT(prp->set != NULL);
335 RPMEMD_ASSERT(prp->set->nreplicas == 1);
336
337 return util_replica_set_attr(prp->set->replica[0], rattr);
338 }
339
340 struct rm_cb_args {
341 int force;
342 int ret;
343 };
344
345 /*
346 * rm_poolset_cb -- (internal) callback for removing part files
347 */
348 static int
349 rm_poolset_cb(struct part_file *pf, void *arg)
350 {
351 struct rm_cb_args *args = (struct rm_cb_args *)arg;
352 if (pf->is_remote) {
353 RPMEMD_LOG(ERR, "removing remote replica not supported");
354 return -1;
355 }
356
357 int ret = util_unlink_flock(pf->part->path);
358 if (!args->force && ret) {
359 RPMEMD_LOG(ERR, "!unlink -- '%s'", pf->part->path);
360 args->ret = ret;
361 }
362
363 return 0;
364 }
365
366 /*
367 * rpmemd_db_pool_remove -- remove a pool set
368 */
369 int
370 rpmemd_db_pool_remove(struct rpmemd_db *db, const char *pool_desc,
371 int force, int pool_set)
372 {
373 RPMEMD_ASSERT(db != NULL);
374 RPMEMD_ASSERT(pool_desc != NULL);
375
376 util_mutex_lock(&db->lock);
377
378 struct rm_cb_args args;
379 args.force = force;
380 args.ret = 0;
381 char *path;
382
383 path = rpmemd_db_get_path(db, pool_desc);
384 if (!path) {
385 args.ret = -1;
386 goto err_unlock;
387 }
388
389 int ret = util_poolset_foreach_part(path, rm_poolset_cb, &args);
390 if (!force && ret) {
391 RPMEMD_LOG(ERR, "!removing '%s' failed", path);
392 args.ret = ret;
393 goto err_free_path;
394 }
395
396 if (pool_set)
397 os_unlink(path);
398
399 err_free_path:
400 free(path);
401 err_unlock:
402 util_mutex_unlock(&db->lock);
403 return args.ret;
404 }
405
406 /*
407 * rpmemd_db_fini -- deinitialize the rpmem database of pool set files
408 */
409 void
410 rpmemd_db_fini(struct rpmemd_db *db)
411 {
412 RPMEMD_ASSERT(db != NULL);
413
414 util_mutex_destroy(&db->lock);
415 free(db->root_dir);
416 free(db);
417 }
418
419 /*
420 * rpmemd_db_check_dups_set -- (internal) check for duplicates in the database
421 */
422 static inline int
423 rpmemd_db_check_dups_set(struct pool_set *set, const char *path)
424 {
425 for (unsigned r = 0; r < set->nreplicas; r++) {
426 struct pool_replica *rep = set->replica[r];
427 for (unsigned p = 0; p < rep->nparts; p++) {
428 if (strcmp(path, rep->part[p].path) == 0)
429 return -1;
430 }
431 }
432 return 0;
433 }
434
435 /*
436 * rpmemd_db_check_dups -- (internal) check for duplicates in the database
437 */
438 static int
439 rpmemd_db_check_dups(struct list_head *head, struct rpmemd_db *db,
440 const char *pool_desc, struct pool_set *set)
441 {
442 struct rpmemd_db_entry *edb;
443
444 PMDK_LIST_FOREACH(edb, head, next) {
445 for (unsigned r = 0; r < edb->set->nreplicas; r++) {
446 struct pool_replica *rep = edb->set->replica[r];
447 for (unsigned p = 0; p < rep->nparts; p++) {
448 if (rpmemd_db_check_dups_set(set,
449 rep->part[p].path)) {
450 RPMEMD_LOG(ERR, "part file '%s' from "
451 "pool set '%s' duplicated in "
452 "pool set '%s'",
453 rep->part[p].path,
454 pool_desc,
455 edb->pool_desc);
456 errno = EEXIST;
457 return -1;
458 }
459
460 }
461 }
462 }
463 return 0;
464 }
465
466 /*
467 * rpmemd_db_add -- (internal) add an entry for a given set to the database
468 */
469 static struct rpmemd_db_entry *
470 rpmemd_db_add(struct list_head *head, struct rpmemd_db *db,
471 const char *pool_desc, struct pool_set *set)
472 {
473 struct rpmemd_db_entry *edb;
474
475 edb = calloc(1, sizeof(*edb));
476 if (!edb) {
477 RPMEMD_LOG(ERR, "!allocating database entry");
478 goto err_calloc;
479 }
480
481 edb->set = set;
482 edb->pool_desc = strdup(pool_desc);
483 if (!edb->pool_desc) {
484 RPMEMD_LOG(ERR, "!allocating path for database entry");
485 goto err_strdup;
486 }
487
488 PMDK_LIST_INSERT_HEAD(head, edb, next);
489
490 return edb;
491
492 err_strdup:
493 free(edb);
494 err_calloc:
495 return NULL;
496 }
497
498 /*
499 * new_paths -- (internal) create two new paths
500 */
501 static int
502 new_paths(const char *dir, const char *name, const char *old_desc,
503 char **path, char **new_desc)
504 {
505 *path = rpmemd_db_concat(dir, name);
506 if (!(*path))
507 return -1;
508
509 if (old_desc[0] != 0)
510 *new_desc = rpmemd_db_concat(old_desc, name);
511 else {
512 *new_desc = strdup(name);
513 if (!(*new_desc)) {
514 RPMEMD_LOG(ERR, "!allocating new descriptor");
515 }
516 }
517 if (!(*new_desc)) {
518 free(*path);
519 return -1;
520 }
521 return 0;
522 }
523
524 /*
525 * rpmemd_db_check_dir_r -- (internal) recursively check given directory
526 * for duplicates
527 */
528 static int
529 rpmemd_db_check_dir_r(struct list_head *head, struct rpmemd_db *db,
530 const char *dir, char *pool_desc)
531 {
532 char *new_dir, *new_desc, *full_path;
533 struct dirent *dentry;
534 struct pool_set *set = NULL;
535 DIR *dirp;
536 int ret = 0;
537
538 dirp = opendir(dir);
539 if (dirp == NULL) {
540 RPMEMD_LOG(ERR, "cannot open the directory -- %s", dir);
541 return -1;
542 }
543
544 while ((dentry = readdir(dirp)) != NULL) {
545 if (strcmp(dentry->d_name, ".") == 0 ||
546 strcmp(dentry->d_name, "..") == 0)
547 continue;
548
549 if (dentry->d_type == DT_DIR) { /* directory */
550 if (new_paths(dir, dentry->d_name, pool_desc,
551 &new_dir, &new_desc))
552 goto err_closedir;
553
554 /* call recursively for a new directory */
555 ret = rpmemd_db_check_dir_r(head, db, new_dir,
556 new_desc);
557 free(new_dir);
558 free(new_desc);
559 if (ret)
560 goto err_closedir;
561 continue;
562
563 }
564
565 if (new_paths(dir, dentry->d_name, pool_desc,
566 &full_path, &new_desc)) {
567 goto err_closedir;
568 }
569 if (util_poolset_read(&set, full_path)) {
570 RPMEMD_LOG(ERR, "!error reading pool set file -- %s",
571 full_path);
572 goto err_free_paths;
573 }
574 if (rpmemd_db_check_dups(head, db, new_desc, set)) {
575 RPMEMD_LOG(ERR, "!duplicate found in pool set file"
576 " -- %s", full_path);
577 goto err_free_set;
578 }
579 if (rpmemd_db_add(head, db, new_desc, set) == NULL) {
580 goto err_free_set;
581 }
582
583 free(new_desc);
584 free(full_path);
585 }
586
587 closedir(dirp);
588 return 0;
589
590 err_free_set:
591 util_poolset_close(set, DO_NOT_DELETE_PARTS);
592 err_free_paths:
593 free(new_desc);
594 free(full_path);
595 err_closedir:
596 closedir(dirp);
597 return -1;
598 }
599
600 /*
601 * rpmemd_db_check_dir -- check given directory for duplicates
602 */
603 int
604 rpmemd_db_check_dir(struct rpmemd_db *db)
605 {
606 RPMEMD_ASSERT(db != NULL);
607
608 util_mutex_lock(&db->lock);
609
610 struct list_head head;
611 PMDK_LIST_INIT(&head);
612
613 int ret = rpmemd_db_check_dir_r(&head, db, db->root_dir, "");
614
615 while (!PMDK_LIST_EMPTY(&head)) {
616 struct rpmemd_db_entry *edb = PMDK_LIST_FIRST(&head);
617 PMDK_LIST_REMOVE(edb, next);
618 util_poolset_close(edb->set, DO_NOT_DELETE_PARTS);
619 free(edb->pool_desc);
620 free(edb);
621 }
622
623 util_mutex_unlock(&db->lock);
624
625 return ret;
626 }
627
628 /*
629 * rpmemd_db_pool_is_pmem -- true if pool is in PMEM
630 */
631 int
632 rpmemd_db_pool_is_pmem(struct rpmemd_db_pool *pool)
633 {
634 return REP(pool->set, 0)->is_pmem;
635 }