]> git.proxmox.com Git - ceph.git/blame - ceph/src/pmdk/src/common/set.h
import ceph 16.2.7
[ceph.git] / ceph / src / pmdk / src / common / set.h
CommitLineData
a4b75251
TL
1/* SPDX-License-Identifier: BSD-3-Clause */
2/* Copyright 2014-2020, Intel Corporation */
3/*
4 * Copyright (c) 2016, Microsoft Corporation. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * * Neither the name of the copyright holder nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 */
34
35/*
36 * set.h -- internal definitions for set module
37 */
38
39#ifndef PMDK_SET_H
40#define PMDK_SET_H 1
41
42#include <stddef.h>
43#include <stdint.h>
44#include <stdio.h>
45
46#include <sys/types.h>
47
48#include "out.h"
49#include "vec.h"
50#include "pool_hdr.h"
51#include "librpmem.h"
52
53#ifdef __cplusplus
54extern "C" {
55#endif
56
57/*
58 * pool sets & replicas
59 */
60#define POOLSET_HDR_SIG "PMEMPOOLSET"
61#define POOLSET_HDR_SIG_LEN 11 /* does NOT include '\0' */
62
63#define POOLSET_REPLICA_SIG "REPLICA"
64#define POOLSET_REPLICA_SIG_LEN 7 /* does NOT include '\0' */
65
66#define POOLSET_OPTION_SIG "OPTION"
67#define POOLSET_OPTION_SIG_LEN 6 /* does NOT include '\0' */
68
69/* pool set option flags */
70enum pool_set_option_flag {
71 OPTION_UNKNOWN = 0x0,
72 OPTION_SINGLEHDR = 0x1, /* pool headers only in the first part */
73 OPTION_NOHDRS = 0x2, /* no pool headers, remote replicas only */
74};
75
76struct pool_set_option {
77 const char *name;
78 enum pool_set_option_flag flag;
79};
80
81#define POOL_LOCAL 0
82#define POOL_REMOTE 1
83
84#define REPLICAS_DISABLED 0
85#define REPLICAS_ENABLED 1
86
87/* util_pool_open flags */
88#define POOL_OPEN_COW 1 /* copy-on-write mode */
89#define POOL_OPEN_IGNORE_SDS 2 /* ignore shutdown state */
90#define POOL_OPEN_IGNORE_BAD_BLOCKS 4 /* ignore bad blocks */
91#define POOL_OPEN_CHECK_BAD_BLOCKS 8 /* check bad blocks */
92
93enum del_parts_mode {
94 DO_NOT_DELETE_PARTS, /* do not delete part files */
95 DELETE_CREATED_PARTS, /* delete only newly created parts files */
96 DELETE_ALL_PARTS /* force delete all parts files */
97};
98
99struct pool_set_part {
100 /* populated by a pool set file parser */
101 const char *path;
102 size_t filesize; /* aligned to page size */
103 int fd;
104 int flags; /* stores flags used when opening the file */
105 /* valid only if fd >= 0 */
106 int is_dev_dax; /* indicates if the part is on device dax */
107 size_t alignment; /* internal alignment (Device DAX only) */
108 int created; /* indicates newly created (zeroed) file */
109
110 /* util_poolset_open/create */
111 void *remote_hdr; /* allocated header for remote replica */
112 void *hdr; /* base address of header */
113 size_t hdrsize; /* size of the header mapping */
114 int hdr_map_sync; /* header mapped with MAP_SYNC */
115 void *addr; /* base address of the mapping */
116 size_t size; /* size of the mapping - page aligned */
117 int map_sync; /* part has been mapped with MAP_SYNC flag */
118 int rdonly; /* is set based on compat features, affects */
119 /* the whole poolset */
120 uuid_t uuid;
121 int has_bad_blocks; /* part file contains bad blocks */
122 int sds_dirty_modified; /* sds dirty flag was set */
123};
124
125struct pool_set_directory {
126 const char *path;
127 size_t resvsize; /* size of the address space reservation */
128
129};
130
131struct remote_replica {
132 void *rpp; /* RPMEMpool opaque handle */
133 char *node_addr; /* address of a remote node */
134 /* poolset descriptor is a pool set file name on a remote node */
135 char *pool_desc; /* descriptor of a poolset */
136};
137
138struct pool_replica {
139 unsigned nparts;
140 unsigned nallocated;
141 unsigned nhdrs; /* should be 0, 1 or nparts */
142 size_t repsize; /* total size of all the parts (mappings) */
143 size_t resvsize; /* min size of the address space reservation */
144 int is_pmem; /* true if all the parts are in PMEM */
145 struct remote_replica *remote; /* not NULL if the replica */
146 /* is a remote one */
147 VEC(, struct pool_set_directory) directory;
148 struct pool_set_part part[];
149};
150
151struct pool_set {
152 char *path; /* path of the poolset file */
153 unsigned nreplicas;
154 uuid_t uuid;
155 int rdonly;
156 int zeroed; /* true if all the parts are new files */
157 size_t poolsize; /* the smallest replica size */
158 int has_bad_blocks; /* pool set contains bad blocks */
159 int remote; /* true if contains a remote replica */
160 unsigned options; /* enabled pool set options */
161
162 int directory_based;
163 size_t resvsize;
164
165 unsigned next_id;
166 unsigned next_directory_id;
167
168 int ignore_sds; /* don't use shutdown state */
169 struct pool_replica *replica[];
170};
171
172struct part_file {
173 int is_remote;
174 /*
175 * Pointer to the part file structure -
176 * - not-NULL only for a local part file
177 */
178 struct pool_set_part *part;
179 /*
180 * Pointer to the replica structure -
181 * - not-NULL only for a remote replica
182 */
183 struct remote_replica *remote;
184};
185
186struct pool_attr {
187 char signature[POOL_HDR_SIG_LEN]; /* pool signature */
188 uint32_t major; /* format major version number */
189 features_t features; /* features flags */
190 unsigned char poolset_uuid[POOL_HDR_UUID_LEN]; /* pool uuid */
191 unsigned char first_part_uuid[POOL_HDR_UUID_LEN]; /* first part uuid */
192 unsigned char prev_repl_uuid[POOL_HDR_UUID_LEN]; /* prev replica uuid */
193 unsigned char next_repl_uuid[POOL_HDR_UUID_LEN]; /* next replica uuid */
194 unsigned char arch_flags[POOL_HDR_ARCH_LEN]; /* arch flags */
195};
196
197/* get index of the (r)th replica */
198static inline unsigned
199REPidx(const struct pool_set *set, unsigned r)
200{
201 ASSERTne(set->nreplicas, 0);
202 return r % set->nreplicas;
203}
204
205/* get index of the (r + 1)th replica */
206static inline unsigned
207REPNidx(const struct pool_set *set, unsigned r)
208{
209 ASSERTne(set->nreplicas, 0);
210 return (r + 1) % set->nreplicas;
211}
212
213/* get index of the (r - 1)th replica */
214static inline unsigned
215REPPidx(const struct pool_set *set, unsigned r)
216{
217 ASSERTne(set->nreplicas, 0);
218 return (set->nreplicas + r - 1) % set->nreplicas;
219}
220
221/* get index of the (r)th part */
222static inline unsigned
223PARTidx(const struct pool_replica *rep, unsigned p)
224{
225 ASSERTne(rep->nparts, 0);
226 return p % rep->nparts;
227}
228
229/* get index of the (r + 1)th part */
230static inline unsigned
231PARTNidx(const struct pool_replica *rep, unsigned p)
232{
233 ASSERTne(rep->nparts, 0);
234 return (p + 1) % rep->nparts;
235}
236
237/* get index of the (r - 1)th part */
238static inline unsigned
239PARTPidx(const struct pool_replica *rep, unsigned p)
240{
241 ASSERTne(rep->nparts, 0);
242 return (rep->nparts + p - 1) % rep->nparts;
243}
244
245/* get index of the (r)th part */
246static inline unsigned
247HDRidx(const struct pool_replica *rep, unsigned p)
248{
249 ASSERTne(rep->nhdrs, 0);
250 return p % rep->nhdrs;
251}
252
253/* get index of the (r + 1)th part */
254static inline unsigned
255HDRNidx(const struct pool_replica *rep, unsigned p)
256{
257 ASSERTne(rep->nhdrs, 0);
258 return (p + 1) % rep->nhdrs;
259}
260
261/* get index of the (r - 1)th part */
262static inline unsigned
263HDRPidx(const struct pool_replica *rep, unsigned p)
264{
265 ASSERTne(rep->nhdrs, 0);
266 return (rep->nhdrs + p - 1) % rep->nhdrs;
267}
268
269/* get (r)th replica */
270static inline struct pool_replica *
271REP(const struct pool_set *set, unsigned r)
272{
273 return set->replica[REPidx(set, r)];
274}
275
276/* get (r + 1)th replica */
277static inline struct pool_replica *
278REPN(const struct pool_set *set, unsigned r)
279{
280 return set->replica[REPNidx(set, r)];
281}
282
283/* get (r - 1)th replica */
284static inline struct pool_replica *
285REPP(const struct pool_set *set, unsigned r)
286{
287 return set->replica[REPPidx(set, r)];
288}
289
290/* get (p)th part */
291static inline struct pool_set_part *
292PART(struct pool_replica *rep, unsigned p)
293{
294 return &rep->part[PARTidx(rep, p)];
295}
296
297/* get (p + 1)th part */
298static inline struct pool_set_part *
299PARTN(struct pool_replica *rep, unsigned p)
300{
301 return &rep->part[PARTNidx(rep, p)];
302}
303
304/* get (p - 1)th part */
305static inline struct pool_set_part *
306PARTP(struct pool_replica *rep, unsigned p)
307{
308 return &rep->part[PARTPidx(rep, p)];
309}
310
311/* get (p)th header */
312static inline struct pool_hdr *
313HDR(struct pool_replica *rep, unsigned p)
314{
315 return (struct pool_hdr *)(rep->part[HDRidx(rep, p)].hdr);
316}
317
318/* get (p + 1)th header */
319static inline struct pool_hdr *
320HDRN(struct pool_replica *rep, unsigned p)
321{
322 return (struct pool_hdr *)(rep->part[HDRNidx(rep, p)].hdr);
323}
324
325/* get (p - 1)th header */
326static inline struct pool_hdr *
327HDRP(struct pool_replica *rep, unsigned p)
328{
329 return (struct pool_hdr *)(rep->part[HDRPidx(rep, p)].hdr);
330}
331
332extern int Prefault_at_open;
333extern int Prefault_at_create;
334extern int SDS_at_create;
335extern int Fallocate_at_create;
336extern int COW_at_open;
337
338int util_poolset_parse(struct pool_set **setp, const char *path, int fd);
339int util_poolset_read(struct pool_set **setp, const char *path);
340int util_poolset_create_set(struct pool_set **setp, const char *path,
341 size_t poolsize, size_t minsize, int ignore_sds);
342int util_poolset_open(struct pool_set *set);
343void util_poolset_close(struct pool_set *set, enum del_parts_mode del);
344void util_poolset_free(struct pool_set *set);
345int util_poolset_chmod(struct pool_set *set, mode_t mode);
346void util_poolset_fdclose(struct pool_set *set);
347void util_poolset_fdclose_always(struct pool_set *set);
348int util_is_poolset_file(const char *path);
349int util_poolset_foreach_part_struct(struct pool_set *set,
350 int (*cb)(struct part_file *pf, void *arg), void *arg);
351int util_poolset_foreach_part(const char *path,
352 int (*cb)(struct part_file *pf, void *arg), void *arg);
353size_t util_poolset_size(const char *path);
354
355int util_replica_deep_common(const void *addr, size_t len,
356 struct pool_set *set, unsigned replica_id, int flush);
357int util_replica_deep_persist(const void *addr, size_t len,
358 struct pool_set *set, unsigned replica_id);
359int util_replica_deep_drain(const void *addr, size_t len,
360 struct pool_set *set, unsigned replica_id);
361
362int util_pool_create(struct pool_set **setp, const char *path, size_t poolsize,
363 size_t minsize, size_t minpartsize, const struct pool_attr *attr,
364 unsigned *nlanes, int can_have_rep);
365int util_pool_create_uuids(struct pool_set **setp, const char *path,
366 size_t poolsize, size_t minsize, size_t minpartsize,
367 const struct pool_attr *attr, unsigned *nlanes, int can_have_rep,
368 int remote);
369
370int util_part_open(struct pool_set_part *part, size_t minsize, int create_part);
371void util_part_fdclose(struct pool_set_part *part);
372int util_replica_open(struct pool_set *set, unsigned repidx, int flags);
373int util_replica_set_attr(struct pool_replica *rep,
374 const struct rpmem_pool_attr *rattr);
375void util_pool_hdr2attr(struct pool_attr *attr, struct pool_hdr *hdr);
376void util_pool_attr2hdr(struct pool_hdr *hdr,
377 const struct pool_attr *attr);
378int util_replica_close(struct pool_set *set, unsigned repidx);
379int util_map_part(struct pool_set_part *part, void *addr, size_t size,
380 size_t offset, int flags, int rdonly);
381int util_unmap_part(struct pool_set_part *part);
382int util_unmap_parts(struct pool_replica *rep, unsigned start_index,
383 unsigned end_index);
384int util_header_create(struct pool_set *set, unsigned repidx, unsigned partidx,
385 const struct pool_attr *attr, int overwrite);
386
387int util_map_hdr(struct pool_set_part *part, int flags, int rdonly);
388void util_unmap_hdr(struct pool_set_part *part);
389
390int util_pool_has_device_dax(struct pool_set *set);
391
392int util_pool_open_nocheck(struct pool_set *set, unsigned flags);
393int util_pool_open(struct pool_set **setp, const char *path, size_t minpartsize,
394 const struct pool_attr *attr, unsigned *nlanes, void *addr,
395 unsigned flags);
396int util_pool_open_remote(struct pool_set **setp, const char *path, int cow,
397 size_t minpartsize, struct rpmem_pool_attr *rattr);
398
399void *util_pool_extend(struct pool_set *set, size_t *size, size_t minpartsize);
400
401void util_remote_init(void);
402void util_remote_fini(void);
403
404int util_update_remote_header(struct pool_set *set, unsigned repn);
405void util_remote_init_lock(void);
406void util_remote_destroy_lock(void);
407int util_pool_close_remote(RPMEMpool *rpp);
408void util_remote_unload(void);
409void util_replica_fdclose(struct pool_replica *rep);
410int util_poolset_remote_open(struct pool_replica *rep, unsigned repidx,
411 size_t minsize, int create, void *pool_addr,
412 size_t pool_size, unsigned *nlanes);
413int util_remote_load(void);
414int util_replica_open_remote(struct pool_set *set, unsigned repidx, int flags);
415int util_poolset_remote_replica_open(struct pool_set *set, unsigned repidx,
416 size_t minsize, int create, unsigned *nlanes);
417int util_replica_close_local(struct pool_replica *rep, unsigned repn,
418 enum del_parts_mode del);
419int util_replica_close_remote(struct pool_replica *rep, unsigned repn,
420 enum del_parts_mode del);
421
422extern int (*Rpmem_persist)(RPMEMpool *rpp, size_t offset, size_t length,
423 unsigned lane, unsigned flags);
424extern int (*Rpmem_deep_persist)(RPMEMpool *rpp, size_t offset, size_t length,
425 unsigned lane);
426extern int (*Rpmem_read)(RPMEMpool *rpp, void *buff, size_t offset,
427 size_t length, unsigned lane);
428extern int (*Rpmem_close)(RPMEMpool *rpp);
429
430extern int (*Rpmem_remove)(const char *target,
431 const char *pool_set_name, int flags);
432
433extern int (*Rpmem_set_attr)(RPMEMpool *rpp,
434 const struct rpmem_pool_attr *rattr);
435
436#ifdef __cplusplus
437}
438#endif
439
440#endif