]>
git.proxmox.com Git - ceph.git/blob - ceph/src/pmdk/src/tools/rpmemd/rpmemd_db.c
1 // SPDX-License-Identifier: BSD-3-Clause
2 /* Copyright 2016-2020, Intel Corporation */
5 * rpmemd_db.c -- rpmemd database of pool set files
25 #include "rpmemd_db.h"
26 #include "rpmemd_log.h"
29 * struct rpmemd_db -- pool set database structure
38 * declaration of the 'struct list_head' type
40 PMDK_LIST_HEAD(list_head
, rpmemd_db_entry
);
43 * struct rpmemd_db_entry -- entry in the pool set list
45 struct rpmemd_db_entry
{
46 PMDK_LIST_ENTRY(rpmemd_db_entry
) next
;
52 * rpmemd_db_init -- initialize the rpmem database of pool set files
55 rpmemd_db_init(const char *root_dir
, mode_t mode
)
57 if (root_dir
[0] != '/') {
58 RPMEMD_LOG(ERR
, "root directory is not an absolute path"
59 " -- '%s'", root_dir
);
63 struct rpmemd_db
*db
= calloc(1, sizeof(*db
));
65 RPMEMD_LOG(ERR
, "!allocating the rpmem database structure");
69 db
->root_dir
= strdup(root_dir
);
71 RPMEMD_LOG(ERR
, "!allocating the root dir path");
78 util_mutex_init(&db
->lock
);
84 * rpmemd_db_concat -- (internal) concatenate two paths
87 rpmemd_db_concat(const char *path1
, const char *path2
)
89 size_t len1
= strlen(path1
);
90 size_t len2
= strlen(path2
);
91 size_t new_len
= len1
+ len2
+ 2; /* +1 for '/' in snprintf() */
93 if (path1
[0] != '/') {
94 RPMEMD_LOG(ERR
, "the first path is not an absolute one -- '%s'",
99 if (path2
[0] == '/') {
100 RPMEMD_LOG(ERR
, "the second path is not a relative one -- '%s'",
102 /* set to EBADF to distinguish this case from other errors */
107 char *new_str
= malloc(new_len
);
108 if (new_str
== NULL
) {
109 RPMEMD_LOG(ERR
, "!allocating path buffer");
113 int ret
= util_snprintf(new_str
, new_len
, "%s/%s", path1
, path2
);
115 RPMEMD_LOG(ERR
, "!snprintf");
125 * rpmemd_db_get_path -- (internal) get the full path of the pool set file
128 rpmemd_db_get_path(struct rpmemd_db
*db
, const char *pool_desc
)
130 return rpmemd_db_concat(db
->root_dir
, pool_desc
);
134 * rpmemd_db_pool_madvise -- (internal) workaround device dax alignment issue
137 rpmemd_db_pool_madvise(struct pool_set
*set
)
140 * This is a workaround for an issue with using device dax with
141 * libibverbs. The problem is that we use ibv_fork_init(3) which
142 * makes all registered memory being madvised with MADV_DONTFORK
143 * flag. In libpmemobj the remote replication is performed without
144 * pool header (first 4k). In such case the address passed to
145 * madvise(2) is aligned to 4k, but device dax can require different
146 * alignment (default is 2MB). This workaround madvises the entire
147 * memory region before registering it by ibv_reg_mr(3).
149 const struct pool_set_part
*part
= &set
->replica
[0]->part
[0];
150 if (part
->is_dev_dax
) {
151 int ret
= os_madvise(part
->addr
, part
->filesize
,
162 * rpmemd_get_attr -- (internal) get pool attributes from remote pool attributes
165 rpmemd_get_attr(struct pool_attr
*attr
, const struct rpmem_pool_attr
*rattr
)
167 LOG(3, "attr %p, rattr %p", attr
, rattr
);
168 memcpy(attr
->signature
, rattr
->signature
, POOL_HDR_SIG_LEN
);
169 attr
->major
= rattr
->major
;
170 attr
->features
.compat
= rattr
->compat_features
;
171 attr
->features
.incompat
= rattr
->incompat_features
;
172 attr
->features
.ro_compat
= rattr
->ro_compat_features
;
173 memcpy(attr
->poolset_uuid
, rattr
->poolset_uuid
, POOL_HDR_UUID_LEN
);
174 memcpy(attr
->first_part_uuid
, rattr
->uuid
, POOL_HDR_UUID_LEN
);
175 memcpy(attr
->prev_repl_uuid
, rattr
->prev_uuid
, POOL_HDR_UUID_LEN
);
176 memcpy(attr
->next_repl_uuid
, rattr
->next_uuid
, POOL_HDR_UUID_LEN
);
177 memcpy(attr
->arch_flags
, rattr
->user_flags
, POOL_HDR_ARCH_LEN
);
181 * rpmemd_db_pool_create -- create a new pool set
183 struct rpmemd_db_pool
*
184 rpmemd_db_pool_create(struct rpmemd_db
*db
, const char *pool_desc
,
185 size_t pool_size
, const struct rpmem_pool_attr
*rattr
)
187 RPMEMD_ASSERT(db
!= NULL
);
189 util_mutex_lock(&db
->lock
);
191 struct rpmemd_db_pool
*prp
= NULL
;
192 struct pool_set
*set
;
196 prp
= malloc(sizeof(struct rpmemd_db_pool
));
198 RPMEMD_LOG(ERR
, "!allocating pool set db entry");
202 path
= rpmemd_db_get_path(db
, pool_desc
);
207 struct pool_attr attr
;
208 struct pool_attr
*pattr
= NULL
;
210 rpmemd_get_attr(&attr
, rattr
);
214 ret
= util_pool_create_uuids(&set
, path
, 0, RPMEM_MIN_POOL
,
215 RPMEM_MIN_PART
, pattr
, NULL
, REPLICAS_DISABLED
,
218 RPMEMD_LOG(ERR
, "!cannot create pool set -- '%s'", path
);
222 ret
= util_poolset_chmod(set
, db
->mode
);
224 RPMEMD_LOG(ERR
, "!cannot change pool set mode bits to 0%o",
228 if (rpmemd_db_pool_madvise(set
))
229 goto err_poolset_close
;
232 prp
->pool_addr
= set
->replica
[0]->part
[0].addr
;
233 prp
->pool_size
= set
->poolsize
;
237 util_mutex_unlock(&db
->lock
);
242 util_poolset_close(set
, DO_NOT_DELETE_PARTS
);
248 util_mutex_unlock(&db
->lock
);
253 * rpmemd_db_pool_open -- open a pool set
255 struct rpmemd_db_pool
*
256 rpmemd_db_pool_open(struct rpmemd_db
*db
, const char *pool_desc
,
257 size_t pool_size
, struct rpmem_pool_attr
*rattr
)
259 RPMEMD_ASSERT(db
!= NULL
);
260 RPMEMD_ASSERT(rattr
!= NULL
);
262 util_mutex_lock(&db
->lock
);
264 struct rpmemd_db_pool
*prp
= NULL
;
265 struct pool_set
*set
;
269 prp
= malloc(sizeof(struct rpmemd_db_pool
));
271 RPMEMD_LOG(ERR
, "!allocating pool set db entry");
275 path
= rpmemd_db_get_path(db
, pool_desc
);
280 ret
= util_pool_open_remote(&set
, path
, 0, RPMEM_MIN_PART
, rattr
);
282 RPMEMD_LOG(ERR
, "!cannot open pool set -- '%s'", path
);
286 if (rpmemd_db_pool_madvise(set
))
287 goto err_poolset_close
;
290 prp
->pool_addr
= set
->replica
[0]->part
[0].addr
;
291 prp
->pool_size
= set
->poolsize
;
295 util_mutex_unlock(&db
->lock
);
300 util_poolset_close(set
, DO_NOT_DELETE_PARTS
);
306 util_mutex_unlock(&db
->lock
);
311 * rpmemd_db_pool_close -- close a pool set
314 rpmemd_db_pool_close(struct rpmemd_db
*db
, struct rpmemd_db_pool
*prp
)
316 RPMEMD_ASSERT(db
!= NULL
);
318 util_mutex_lock(&db
->lock
);
320 util_poolset_close(prp
->set
, DO_NOT_DELETE_PARTS
);
323 util_mutex_unlock(&db
->lock
);
327 * rpmemd_db_pool_set_attr -- overwrite pool attributes
330 rpmemd_db_pool_set_attr(struct rpmemd_db_pool
*prp
,
331 const struct rpmem_pool_attr
*rattr
)
333 RPMEMD_ASSERT(prp
!= NULL
);
334 RPMEMD_ASSERT(prp
->set
!= NULL
);
335 RPMEMD_ASSERT(prp
->set
->nreplicas
== 1);
337 return util_replica_set_attr(prp
->set
->replica
[0], rattr
);
346 * rm_poolset_cb -- (internal) callback for removing part files
349 rm_poolset_cb(struct part_file
*pf
, void *arg
)
351 struct rm_cb_args
*args
= (struct rm_cb_args
*)arg
;
353 RPMEMD_LOG(ERR
, "removing remote replica not supported");
357 int ret
= util_unlink_flock(pf
->part
->path
);
358 if (!args
->force
&& ret
) {
359 RPMEMD_LOG(ERR
, "!unlink -- '%s'", pf
->part
->path
);
367 * rpmemd_db_pool_remove -- remove a pool set
370 rpmemd_db_pool_remove(struct rpmemd_db
*db
, const char *pool_desc
,
371 int force
, int pool_set
)
373 RPMEMD_ASSERT(db
!= NULL
);
374 RPMEMD_ASSERT(pool_desc
!= NULL
);
376 util_mutex_lock(&db
->lock
);
378 struct rm_cb_args args
;
383 path
= rpmemd_db_get_path(db
, pool_desc
);
389 int ret
= util_poolset_foreach_part(path
, rm_poolset_cb
, &args
);
391 RPMEMD_LOG(ERR
, "!removing '%s' failed", path
);
402 util_mutex_unlock(&db
->lock
);
407 * rpmemd_db_fini -- deinitialize the rpmem database of pool set files
410 rpmemd_db_fini(struct rpmemd_db
*db
)
412 RPMEMD_ASSERT(db
!= NULL
);
414 util_mutex_destroy(&db
->lock
);
420 * rpmemd_db_check_dups_set -- (internal) check for duplicates in the database
423 rpmemd_db_check_dups_set(struct pool_set
*set
, const char *path
)
425 for (unsigned r
= 0; r
< set
->nreplicas
; r
++) {
426 struct pool_replica
*rep
= set
->replica
[r
];
427 for (unsigned p
= 0; p
< rep
->nparts
; p
++) {
428 if (strcmp(path
, rep
->part
[p
].path
) == 0)
436 * rpmemd_db_check_dups -- (internal) check for duplicates in the database
439 rpmemd_db_check_dups(struct list_head
*head
, struct rpmemd_db
*db
,
440 const char *pool_desc
, struct pool_set
*set
)
442 struct rpmemd_db_entry
*edb
;
444 PMDK_LIST_FOREACH(edb
, head
, next
) {
445 for (unsigned r
= 0; r
< edb
->set
->nreplicas
; r
++) {
446 struct pool_replica
*rep
= edb
->set
->replica
[r
];
447 for (unsigned p
= 0; p
< rep
->nparts
; p
++) {
448 if (rpmemd_db_check_dups_set(set
,
449 rep
->part
[p
].path
)) {
450 RPMEMD_LOG(ERR
, "part file '%s' from "
451 "pool set '%s' duplicated in "
467 * rpmemd_db_add -- (internal) add an entry for a given set to the database
469 static struct rpmemd_db_entry
*
470 rpmemd_db_add(struct list_head
*head
, struct rpmemd_db
*db
,
471 const char *pool_desc
, struct pool_set
*set
)
473 struct rpmemd_db_entry
*edb
;
475 edb
= calloc(1, sizeof(*edb
));
477 RPMEMD_LOG(ERR
, "!allocating database entry");
482 edb
->pool_desc
= strdup(pool_desc
);
483 if (!edb
->pool_desc
) {
484 RPMEMD_LOG(ERR
, "!allocating path for database entry");
488 PMDK_LIST_INSERT_HEAD(head
, edb
, next
);
499 * new_paths -- (internal) create two new paths
502 new_paths(const char *dir
, const char *name
, const char *old_desc
,
503 char **path
, char **new_desc
)
505 *path
= rpmemd_db_concat(dir
, name
);
509 if (old_desc
[0] != 0)
510 *new_desc
= rpmemd_db_concat(old_desc
, name
);
512 *new_desc
= strdup(name
);
514 RPMEMD_LOG(ERR
, "!allocating new descriptor");
525 * rpmemd_db_check_dir_r -- (internal) recursively check given directory
529 rpmemd_db_check_dir_r(struct list_head
*head
, struct rpmemd_db
*db
,
530 const char *dir
, char *pool_desc
)
532 char *new_dir
, *new_desc
, *full_path
;
533 struct dirent
*dentry
;
534 struct pool_set
*set
= NULL
;
540 RPMEMD_LOG(ERR
, "cannot open the directory -- %s", dir
);
544 while ((dentry
= readdir(dirp
)) != NULL
) {
545 if (strcmp(dentry
->d_name
, ".") == 0 ||
546 strcmp(dentry
->d_name
, "..") == 0)
549 if (dentry
->d_type
== DT_DIR
) { /* directory */
550 if (new_paths(dir
, dentry
->d_name
, pool_desc
,
551 &new_dir
, &new_desc
))
554 /* call recursively for a new directory */
555 ret
= rpmemd_db_check_dir_r(head
, db
, new_dir
,
565 if (new_paths(dir
, dentry
->d_name
, pool_desc
,
566 &full_path
, &new_desc
)) {
569 if (util_poolset_read(&set
, full_path
)) {
570 RPMEMD_LOG(ERR
, "!error reading pool set file -- %s",
574 if (rpmemd_db_check_dups(head
, db
, new_desc
, set
)) {
575 RPMEMD_LOG(ERR
, "!duplicate found in pool set file"
576 " -- %s", full_path
);
579 if (rpmemd_db_add(head
, db
, new_desc
, set
) == NULL
) {
591 util_poolset_close(set
, DO_NOT_DELETE_PARTS
);
601 * rpmemd_db_check_dir -- check given directory for duplicates
604 rpmemd_db_check_dir(struct rpmemd_db
*db
)
606 RPMEMD_ASSERT(db
!= NULL
);
608 util_mutex_lock(&db
->lock
);
610 struct list_head head
;
611 PMDK_LIST_INIT(&head
);
613 int ret
= rpmemd_db_check_dir_r(&head
, db
, db
->root_dir
, "");
615 while (!PMDK_LIST_EMPTY(&head
)) {
616 struct rpmemd_db_entry
*edb
= PMDK_LIST_FIRST(&head
);
617 PMDK_LIST_REMOVE(edb
, next
);
618 util_poolset_close(edb
->set
, DO_NOT_DELETE_PARTS
);
619 free(edb
->pool_desc
);
623 util_mutex_unlock(&db
->lock
);
629 * rpmemd_db_pool_is_pmem -- true if pool is in PMEM
632 rpmemd_db_pool_is_pmem(struct rpmemd_db_pool
*pool
)
634 return REP(pool
->set
, 0)->is_pmem
;