]>
Commit | Line | Data |
---|---|---|
a4b75251 TL |
1 | /* SPDX-License-Identifier: BSD-3-Clause */ |
2 | /* Copyright 2014-2020, Intel Corporation */ | |
3 | /* | |
4 | * Copyright (c) 2016, Microsoft Corporation. All rights reserved. | |
5 | * | |
6 | * Redistribution and use in source and binary forms, with or without | |
7 | * modification, are permitted provided that the following conditions | |
8 | * are met: | |
9 | * | |
10 | * * Redistributions of source code must retain the above copyright | |
11 | * notice, this list of conditions and the following disclaimer. | |
12 | * | |
13 | * * Redistributions in binary form must reproduce the above copyright | |
14 | * notice, this list of conditions and the following disclaimer in | |
15 | * the documentation and/or other materials provided with the | |
16 | * distribution. | |
17 | * | |
18 | * * Neither the name of the copyright holder nor the names of its | |
19 | * contributors may be used to endorse or promote products derived | |
20 | * from this software without specific prior written permission. | |
21 | * | |
22 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
23 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
24 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
25 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
26 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
27 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
28 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
29 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
30 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
31 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
32 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
33 | */ | |
34 | ||
35 | /* | |
36 | * set.h -- internal definitions for set module | |
37 | */ | |
38 | ||
39 | #ifndef PMDK_SET_H | |
40 | #define PMDK_SET_H 1 | |
41 | ||
42 | #include <stddef.h> | |
43 | #include <stdint.h> | |
44 | #include <stdio.h> | |
45 | ||
46 | #include <sys/types.h> | |
47 | ||
48 | #include "out.h" | |
49 | #include "vec.h" | |
50 | #include "pool_hdr.h" | |
51 | #include "librpmem.h" | |
52 | ||
53 | #ifdef __cplusplus | |
54 | extern "C" { | |
55 | #endif | |
56 | ||
57 | /* | |
58 | * pool sets & replicas | |
59 | */ | |
60 | #define POOLSET_HDR_SIG "PMEMPOOLSET" | |
61 | #define POOLSET_HDR_SIG_LEN 11 /* does NOT include '\0' */ | |
62 | ||
63 | #define POOLSET_REPLICA_SIG "REPLICA" | |
64 | #define POOLSET_REPLICA_SIG_LEN 7 /* does NOT include '\0' */ | |
65 | ||
66 | #define POOLSET_OPTION_SIG "OPTION" | |
67 | #define POOLSET_OPTION_SIG_LEN 6 /* does NOT include '\0' */ | |
68 | ||
69 | /* pool set option flags */ | |
70 | enum pool_set_option_flag { | |
71 | OPTION_UNKNOWN = 0x0, | |
72 | OPTION_SINGLEHDR = 0x1, /* pool headers only in the first part */ | |
73 | OPTION_NOHDRS = 0x2, /* no pool headers, remote replicas only */ | |
74 | }; | |
75 | ||
76 | struct pool_set_option { | |
77 | const char *name; | |
78 | enum pool_set_option_flag flag; | |
79 | }; | |
80 | ||
81 | #define POOL_LOCAL 0 | |
82 | #define POOL_REMOTE 1 | |
83 | ||
84 | #define REPLICAS_DISABLED 0 | |
85 | #define REPLICAS_ENABLED 1 | |
86 | ||
87 | /* util_pool_open flags */ | |
88 | #define POOL_OPEN_COW 1 /* copy-on-write mode */ | |
89 | #define POOL_OPEN_IGNORE_SDS 2 /* ignore shutdown state */ | |
90 | #define POOL_OPEN_IGNORE_BAD_BLOCKS 4 /* ignore bad blocks */ | |
91 | #define POOL_OPEN_CHECK_BAD_BLOCKS 8 /* check bad blocks */ | |
92 | ||
93 | enum del_parts_mode { | |
94 | DO_NOT_DELETE_PARTS, /* do not delete part files */ | |
95 | DELETE_CREATED_PARTS, /* delete only newly created parts files */ | |
96 | DELETE_ALL_PARTS /* force delete all parts files */ | |
97 | }; | |
98 | ||
99 | struct pool_set_part { | |
100 | /* populated by a pool set file parser */ | |
101 | const char *path; | |
102 | size_t filesize; /* aligned to page size */ | |
103 | int fd; | |
104 | int flags; /* stores flags used when opening the file */ | |
105 | /* valid only if fd >= 0 */ | |
106 | int is_dev_dax; /* indicates if the part is on device dax */ | |
107 | size_t alignment; /* internal alignment (Device DAX only) */ | |
108 | int created; /* indicates newly created (zeroed) file */ | |
109 | ||
110 | /* util_poolset_open/create */ | |
111 | void *remote_hdr; /* allocated header for remote replica */ | |
112 | void *hdr; /* base address of header */ | |
113 | size_t hdrsize; /* size of the header mapping */ | |
114 | int hdr_map_sync; /* header mapped with MAP_SYNC */ | |
115 | void *addr; /* base address of the mapping */ | |
116 | size_t size; /* size of the mapping - page aligned */ | |
117 | int map_sync; /* part has been mapped with MAP_SYNC flag */ | |
118 | int rdonly; /* is set based on compat features, affects */ | |
119 | /* the whole poolset */ | |
120 | uuid_t uuid; | |
121 | int has_bad_blocks; /* part file contains bad blocks */ | |
122 | int sds_dirty_modified; /* sds dirty flag was set */ | |
123 | }; | |
124 | ||
125 | struct pool_set_directory { | |
126 | const char *path; | |
127 | size_t resvsize; /* size of the address space reservation */ | |
128 | ||
129 | }; | |
130 | ||
131 | struct remote_replica { | |
132 | void *rpp; /* RPMEMpool opaque handle */ | |
133 | char *node_addr; /* address of a remote node */ | |
134 | /* poolset descriptor is a pool set file name on a remote node */ | |
135 | char *pool_desc; /* descriptor of a poolset */ | |
136 | }; | |
137 | ||
138 | struct pool_replica { | |
139 | unsigned nparts; | |
140 | unsigned nallocated; | |
141 | unsigned nhdrs; /* should be 0, 1 or nparts */ | |
142 | size_t repsize; /* total size of all the parts (mappings) */ | |
143 | size_t resvsize; /* min size of the address space reservation */ | |
144 | int is_pmem; /* true if all the parts are in PMEM */ | |
145 | struct remote_replica *remote; /* not NULL if the replica */ | |
146 | /* is a remote one */ | |
147 | VEC(, struct pool_set_directory) directory; | |
148 | struct pool_set_part part[]; | |
149 | }; | |
150 | ||
151 | struct pool_set { | |
152 | char *path; /* path of the poolset file */ | |
153 | unsigned nreplicas; | |
154 | uuid_t uuid; | |
155 | int rdonly; | |
156 | int zeroed; /* true if all the parts are new files */ | |
157 | size_t poolsize; /* the smallest replica size */ | |
158 | int has_bad_blocks; /* pool set contains bad blocks */ | |
159 | int remote; /* true if contains a remote replica */ | |
160 | unsigned options; /* enabled pool set options */ | |
161 | ||
162 | int directory_based; | |
163 | size_t resvsize; | |
164 | ||
165 | unsigned next_id; | |
166 | unsigned next_directory_id; | |
167 | ||
168 | int ignore_sds; /* don't use shutdown state */ | |
169 | struct pool_replica *replica[]; | |
170 | }; | |
171 | ||
172 | struct part_file { | |
173 | int is_remote; | |
174 | /* | |
175 | * Pointer to the part file structure - | |
176 | * - not-NULL only for a local part file | |
177 | */ | |
178 | struct pool_set_part *part; | |
179 | /* | |
180 | * Pointer to the replica structure - | |
181 | * - not-NULL only for a remote replica | |
182 | */ | |
183 | struct remote_replica *remote; | |
184 | }; | |
185 | ||
186 | struct pool_attr { | |
187 | char signature[POOL_HDR_SIG_LEN]; /* pool signature */ | |
188 | uint32_t major; /* format major version number */ | |
189 | features_t features; /* features flags */ | |
190 | unsigned char poolset_uuid[POOL_HDR_UUID_LEN]; /* pool uuid */ | |
191 | unsigned char first_part_uuid[POOL_HDR_UUID_LEN]; /* first part uuid */ | |
192 | unsigned char prev_repl_uuid[POOL_HDR_UUID_LEN]; /* prev replica uuid */ | |
193 | unsigned char next_repl_uuid[POOL_HDR_UUID_LEN]; /* next replica uuid */ | |
194 | unsigned char arch_flags[POOL_HDR_ARCH_LEN]; /* arch flags */ | |
195 | }; | |
196 | ||
197 | /* get index of the (r)th replica */ | |
198 | static inline unsigned | |
199 | REPidx(const struct pool_set *set, unsigned r) | |
200 | { | |
201 | ASSERTne(set->nreplicas, 0); | |
202 | return r % set->nreplicas; | |
203 | } | |
204 | ||
205 | /* get index of the (r + 1)th replica */ | |
206 | static inline unsigned | |
207 | REPNidx(const struct pool_set *set, unsigned r) | |
208 | { | |
209 | ASSERTne(set->nreplicas, 0); | |
210 | return (r + 1) % set->nreplicas; | |
211 | } | |
212 | ||
213 | /* get index of the (r - 1)th replica */ | |
214 | static inline unsigned | |
215 | REPPidx(const struct pool_set *set, unsigned r) | |
216 | { | |
217 | ASSERTne(set->nreplicas, 0); | |
218 | return (set->nreplicas + r - 1) % set->nreplicas; | |
219 | } | |
220 | ||
221 | /* get index of the (r)th part */ | |
222 | static inline unsigned | |
223 | PARTidx(const struct pool_replica *rep, unsigned p) | |
224 | { | |
225 | ASSERTne(rep->nparts, 0); | |
226 | return p % rep->nparts; | |
227 | } | |
228 | ||
229 | /* get index of the (r + 1)th part */ | |
230 | static inline unsigned | |
231 | PARTNidx(const struct pool_replica *rep, unsigned p) | |
232 | { | |
233 | ASSERTne(rep->nparts, 0); | |
234 | return (p + 1) % rep->nparts; | |
235 | } | |
236 | ||
237 | /* get index of the (r - 1)th part */ | |
238 | static inline unsigned | |
239 | PARTPidx(const struct pool_replica *rep, unsigned p) | |
240 | { | |
241 | ASSERTne(rep->nparts, 0); | |
242 | return (rep->nparts + p - 1) % rep->nparts; | |
243 | } | |
244 | ||
245 | /* get index of the (r)th part */ | |
246 | static inline unsigned | |
247 | HDRidx(const struct pool_replica *rep, unsigned p) | |
248 | { | |
249 | ASSERTne(rep->nhdrs, 0); | |
250 | return p % rep->nhdrs; | |
251 | } | |
252 | ||
253 | /* get index of the (r + 1)th part */ | |
254 | static inline unsigned | |
255 | HDRNidx(const struct pool_replica *rep, unsigned p) | |
256 | { | |
257 | ASSERTne(rep->nhdrs, 0); | |
258 | return (p + 1) % rep->nhdrs; | |
259 | } | |
260 | ||
261 | /* get index of the (r - 1)th part */ | |
262 | static inline unsigned | |
263 | HDRPidx(const struct pool_replica *rep, unsigned p) | |
264 | { | |
265 | ASSERTne(rep->nhdrs, 0); | |
266 | return (rep->nhdrs + p - 1) % rep->nhdrs; | |
267 | } | |
268 | ||
269 | /* get (r)th replica */ | |
270 | static inline struct pool_replica * | |
271 | REP(const struct pool_set *set, unsigned r) | |
272 | { | |
273 | return set->replica[REPidx(set, r)]; | |
274 | } | |
275 | ||
276 | /* get (r + 1)th replica */ | |
277 | static inline struct pool_replica * | |
278 | REPN(const struct pool_set *set, unsigned r) | |
279 | { | |
280 | return set->replica[REPNidx(set, r)]; | |
281 | } | |
282 | ||
283 | /* get (r - 1)th replica */ | |
284 | static inline struct pool_replica * | |
285 | REPP(const struct pool_set *set, unsigned r) | |
286 | { | |
287 | return set->replica[REPPidx(set, r)]; | |
288 | } | |
289 | ||
290 | /* get (p)th part */ | |
291 | static inline struct pool_set_part * | |
292 | PART(struct pool_replica *rep, unsigned p) | |
293 | { | |
294 | return &rep->part[PARTidx(rep, p)]; | |
295 | } | |
296 | ||
297 | /* get (p + 1)th part */ | |
298 | static inline struct pool_set_part * | |
299 | PARTN(struct pool_replica *rep, unsigned p) | |
300 | { | |
301 | return &rep->part[PARTNidx(rep, p)]; | |
302 | } | |
303 | ||
304 | /* get (p - 1)th part */ | |
305 | static inline struct pool_set_part * | |
306 | PARTP(struct pool_replica *rep, unsigned p) | |
307 | { | |
308 | return &rep->part[PARTPidx(rep, p)]; | |
309 | } | |
310 | ||
311 | /* get (p)th header */ | |
312 | static inline struct pool_hdr * | |
313 | HDR(struct pool_replica *rep, unsigned p) | |
314 | { | |
315 | return (struct pool_hdr *)(rep->part[HDRidx(rep, p)].hdr); | |
316 | } | |
317 | ||
318 | /* get (p + 1)th header */ | |
319 | static inline struct pool_hdr * | |
320 | HDRN(struct pool_replica *rep, unsigned p) | |
321 | { | |
322 | return (struct pool_hdr *)(rep->part[HDRNidx(rep, p)].hdr); | |
323 | } | |
324 | ||
325 | /* get (p - 1)th header */ | |
326 | static inline struct pool_hdr * | |
327 | HDRP(struct pool_replica *rep, unsigned p) | |
328 | { | |
329 | return (struct pool_hdr *)(rep->part[HDRPidx(rep, p)].hdr); | |
330 | } | |
331 | ||
332 | extern int Prefault_at_open; | |
333 | extern int Prefault_at_create; | |
334 | extern int SDS_at_create; | |
335 | extern int Fallocate_at_create; | |
336 | extern int COW_at_open; | |
337 | ||
338 | int util_poolset_parse(struct pool_set **setp, const char *path, int fd); | |
339 | int util_poolset_read(struct pool_set **setp, const char *path); | |
340 | int util_poolset_create_set(struct pool_set **setp, const char *path, | |
341 | size_t poolsize, size_t minsize, int ignore_sds); | |
342 | int util_poolset_open(struct pool_set *set); | |
343 | void util_poolset_close(struct pool_set *set, enum del_parts_mode del); | |
344 | void util_poolset_free(struct pool_set *set); | |
345 | int util_poolset_chmod(struct pool_set *set, mode_t mode); | |
346 | void util_poolset_fdclose(struct pool_set *set); | |
347 | void util_poolset_fdclose_always(struct pool_set *set); | |
348 | int util_is_poolset_file(const char *path); | |
349 | int util_poolset_foreach_part_struct(struct pool_set *set, | |
350 | int (*cb)(struct part_file *pf, void *arg), void *arg); | |
351 | int util_poolset_foreach_part(const char *path, | |
352 | int (*cb)(struct part_file *pf, void *arg), void *arg); | |
353 | size_t util_poolset_size(const char *path); | |
354 | ||
355 | int util_replica_deep_common(const void *addr, size_t len, | |
356 | struct pool_set *set, unsigned replica_id, int flush); | |
357 | int util_replica_deep_persist(const void *addr, size_t len, | |
358 | struct pool_set *set, unsigned replica_id); | |
359 | int util_replica_deep_drain(const void *addr, size_t len, | |
360 | struct pool_set *set, unsigned replica_id); | |
361 | ||
362 | int util_pool_create(struct pool_set **setp, const char *path, size_t poolsize, | |
363 | size_t minsize, size_t minpartsize, const struct pool_attr *attr, | |
364 | unsigned *nlanes, int can_have_rep); | |
365 | int util_pool_create_uuids(struct pool_set **setp, const char *path, | |
366 | size_t poolsize, size_t minsize, size_t minpartsize, | |
367 | const struct pool_attr *attr, unsigned *nlanes, int can_have_rep, | |
368 | int remote); | |
369 | ||
370 | int util_part_open(struct pool_set_part *part, size_t minsize, int create_part); | |
371 | void util_part_fdclose(struct pool_set_part *part); | |
372 | int util_replica_open(struct pool_set *set, unsigned repidx, int flags); | |
373 | int util_replica_set_attr(struct pool_replica *rep, | |
374 | const struct rpmem_pool_attr *rattr); | |
375 | void util_pool_hdr2attr(struct pool_attr *attr, struct pool_hdr *hdr); | |
376 | void util_pool_attr2hdr(struct pool_hdr *hdr, | |
377 | const struct pool_attr *attr); | |
378 | int util_replica_close(struct pool_set *set, unsigned repidx); | |
379 | int util_map_part(struct pool_set_part *part, void *addr, size_t size, | |
380 | size_t offset, int flags, int rdonly); | |
381 | int util_unmap_part(struct pool_set_part *part); | |
382 | int util_unmap_parts(struct pool_replica *rep, unsigned start_index, | |
383 | unsigned end_index); | |
384 | int util_header_create(struct pool_set *set, unsigned repidx, unsigned partidx, | |
385 | const struct pool_attr *attr, int overwrite); | |
386 | ||
387 | int util_map_hdr(struct pool_set_part *part, int flags, int rdonly); | |
388 | void util_unmap_hdr(struct pool_set_part *part); | |
389 | ||
390 | int util_pool_has_device_dax(struct pool_set *set); | |
391 | ||
392 | int util_pool_open_nocheck(struct pool_set *set, unsigned flags); | |
393 | int util_pool_open(struct pool_set **setp, const char *path, size_t minpartsize, | |
394 | const struct pool_attr *attr, unsigned *nlanes, void *addr, | |
395 | unsigned flags); | |
396 | int util_pool_open_remote(struct pool_set **setp, const char *path, int cow, | |
397 | size_t minpartsize, struct rpmem_pool_attr *rattr); | |
398 | ||
399 | void *util_pool_extend(struct pool_set *set, size_t *size, size_t minpartsize); | |
400 | ||
401 | void util_remote_init(void); | |
402 | void util_remote_fini(void); | |
403 | ||
404 | int util_update_remote_header(struct pool_set *set, unsigned repn); | |
405 | void util_remote_init_lock(void); | |
406 | void util_remote_destroy_lock(void); | |
407 | int util_pool_close_remote(RPMEMpool *rpp); | |
408 | void util_remote_unload(void); | |
409 | void util_replica_fdclose(struct pool_replica *rep); | |
410 | int util_poolset_remote_open(struct pool_replica *rep, unsigned repidx, | |
411 | size_t minsize, int create, void *pool_addr, | |
412 | size_t pool_size, unsigned *nlanes); | |
413 | int util_remote_load(void); | |
414 | int util_replica_open_remote(struct pool_set *set, unsigned repidx, int flags); | |
415 | int util_poolset_remote_replica_open(struct pool_set *set, unsigned repidx, | |
416 | size_t minsize, int create, unsigned *nlanes); | |
417 | int util_replica_close_local(struct pool_replica *rep, unsigned repn, | |
418 | enum del_parts_mode del); | |
419 | int util_replica_close_remote(struct pool_replica *rep, unsigned repn, | |
420 | enum del_parts_mode del); | |
421 | ||
422 | extern int (*Rpmem_persist)(RPMEMpool *rpp, size_t offset, size_t length, | |
423 | unsigned lane, unsigned flags); | |
424 | extern int (*Rpmem_deep_persist)(RPMEMpool *rpp, size_t offset, size_t length, | |
425 | unsigned lane); | |
426 | extern int (*Rpmem_read)(RPMEMpool *rpp, void *buff, size_t offset, | |
427 | size_t length, unsigned lane); | |
428 | extern int (*Rpmem_close)(RPMEMpool *rpp); | |
429 | ||
430 | extern int (*Rpmem_remove)(const char *target, | |
431 | const char *pool_set_name, int flags); | |
432 | ||
433 | extern int (*Rpmem_set_attr)(RPMEMpool *rpp, | |
434 | const struct rpmem_pool_attr *rattr); | |
435 | ||
436 | #ifdef __cplusplus | |
437 | } | |
438 | #endif | |
439 | ||
440 | #endif |