]> git.proxmox.com Git - ceph.git/blame - ceph/src/pmdk/src/libpmemobj/memblock.h
import ceph 16.2.7
[ceph.git] / ceph / src / pmdk / src / libpmemobj / memblock.h
CommitLineData
a4b75251
TL
1/* SPDX-License-Identifier: BSD-3-Clause */
2/* Copyright 2016-2020, Intel Corporation */
3
4/*
5 * memblock.h -- internal definitions for memory block
6 */
7
8#ifndef LIBPMEMOBJ_MEMBLOCK_H
9#define LIBPMEMOBJ_MEMBLOCK_H 1
10
11#include <stddef.h>
12#include <stdint.h>
13
14#include "os_thread.h"
15#include "heap_layout.h"
16#include "memops.h"
17#include "palloc.h"
18
19#ifdef __cplusplus
20extern "C" {
21#endif
22
23#define MEMORY_BLOCK_NONE \
24(struct memory_block)\
25{0, 0, 0, 0, NULL, NULL, MAX_HEADER_TYPES, MAX_MEMORY_BLOCK, NULL}
26
27#define MEMORY_BLOCK_IS_NONE(_m)\
28((_m).heap == NULL)
29
30#define MEMORY_BLOCK_EQUALS(lhs, rhs)\
31((lhs).zone_id == (rhs).zone_id && (lhs).chunk_id == (rhs).chunk_id &&\
32(lhs).block_off == (rhs).block_off && (lhs).heap == (rhs).heap)
33
34enum memory_block_type {
35 /*
36 * Huge memory blocks are directly backed by memory chunks. A single
37 * huge block can consist of several chunks.
38 * The persistent representation of huge memory blocks can be thought
39 * of as a doubly linked list with variable length elements.
40 * That list is stored in the chunk headers array where one element
41 * directly corresponds to one chunk.
42 *
43 * U - used, F - free, R - footer, . - empty
44 * |U| represents a used chunk with a size index of 1, with type
45 * information (CHUNK_TYPE_USED) stored in the corresponding header
46 * array element - chunk_headers[chunk_id].
47 *
48 * |F...R| represents a free chunk with size index of 5. The empty
49 * chunk headers have undefined values and shouldn't be used. All
50 * chunks with size larger than 1 must have a footer in the last
51 * corresponding header array - chunk_headers[chunk_id - size_idx - 1].
52 *
53 * The above representation of chunks will be used to describe the
54 * way fail-safety is achieved during heap operations.
55 *
56 * Allocation of huge memory block with size index 5:
57 * Initial heap state: |U| <> |F..R| <> |U| <> |F......R|
58 *
59 * The only block that matches that size is at very end of the chunks
60 * list: |F......R|
61 *
62 * As the request was for memory block of size 5, and this ones size is
63 * 7 there's a need to first split the chunk in two.
64 * 1) The last chunk header of the new allocation is marked as footer
65 * and the block after that one is marked as free: |F...RF.R|
66 * This is allowed and has no impact on the heap because this
67 * modification is into chunk header that is otherwise unused, in
68 * other words the linked list didn't change.
69 *
70 * 2) The size index of the first header is changed from previous value
71 * of 7 to 5: |F...R||F.R|
72 * This is a single fail-safe atomic operation and this is the
73 * first change that is noticeable by the heap operations.
74 * A single linked list element is split into two new ones.
75 *
76 * 3) The allocation process either uses redo log or changes directly
77 * the chunk header type from free to used: |U...R| <> |F.R|
78 *
79 * In a similar fashion the reverse operation, free, is performed:
80 * Initial heap state: |U| <> |F..R| <> |F| <> |U...R| <> |F.R|
81 *
82 * This is the heap after the previous example with the single chunk
83 * in between changed from used to free.
84 *
85 * 1) Determine the neighbors of the memory block which is being
86 * freed.
87 *
88 * 2) Update the footer (if needed) information of the last chunk which
89 * is the memory block being freed or it's neighbor to the right.
90 * |F| <> |U...R| <> |F.R << this one|
91 *
92 * 3) Update the size index and type of the left-most chunk header.
93 * And so this: |F << this one| <> |U...R| <> |F.R|
94 * becomes this: |F.......R|
95 * The entire chunk header can be updated in a single fail-safe
96 * atomic operation because it's size is only 64 bytes.
97 */
98 MEMORY_BLOCK_HUGE,
99 /*
100 * Run memory blocks are chunks with CHUNK_TYPE_RUN and size index of 1.
101 * The entire chunk is subdivided into smaller blocks and has an
102 * additional metadata attached in the form of a bitmap - each bit
103 * corresponds to a single block.
104 * In this case there's no need to perform any coalescing or splitting
105 * on the persistent metadata.
106 * The bitmap is stored on a variable number of 64 bit values and
107 * because of the requirement of allocation fail-safe atomicity the
108 * maximum size index of a memory block from a run is 64 - since that's
109 * the limit of atomic write guarantee.
110 *
111 * The allocation/deallocation process is a single 8 byte write that
112 * sets/clears the corresponding bits. Depending on the user choice
113 * it can either be made atomically or using redo-log when grouped with
114 * other operations.
115 * It's also important to note that in a case of realloc it might so
116 * happen that a single 8 byte bitmap value has its bits both set and
117 * cleared - that's why the run memory block metadata changes operate
118 * on AND'ing or OR'ing a bitmask instead of directly setting the value.
119 */
120 MEMORY_BLOCK_RUN,
121
122 MAX_MEMORY_BLOCK
123};
124
125enum memblock_state {
126 MEMBLOCK_STATE_UNKNOWN,
127 MEMBLOCK_ALLOCATED,
128 MEMBLOCK_FREE,
129
130 MAX_MEMBLOCK_STATE,
131};
132
133/* runtime bitmap information for a run */
134struct run_bitmap {
135 unsigned nvalues; /* number of 8 byte values - size of values array */
136 unsigned nbits; /* number of valid bits */
137
138 size_t size; /* total size of the bitmap in bytes */
139
140 uint64_t *values; /* pointer to the bitmap's values array */
141};
142
143/* runtime information necessary to create a run */
144struct run_descriptor {
145 uint16_t flags; /* chunk flags for the run */
146 size_t unit_size; /* the size of a single unit in a run */
147 uint32_t size_idx; /* size index of a single run instance */
148 size_t alignment; /* required alignment of objects */
149 unsigned nallocs; /* number of allocs per run */
150 struct run_bitmap bitmap;
151};
152
153struct memory_block_ops {
154 /* returns memory block size */
155 size_t (*block_size)(const struct memory_block *m);
156
157 /* prepares header modification operation */
158 void (*prep_hdr)(const struct memory_block *m,
159 enum memblock_state dest_state, struct operation_context *ctx);
160
161 /* returns lock associated with memory block */
162 os_mutex_t *(*get_lock)(const struct memory_block *m);
163
164 /* returns whether a block is allocated or not */
165 enum memblock_state (*get_state)(const struct memory_block *m);
166
167 /* returns pointer to the data of a block */
168 void *(*get_user_data)(const struct memory_block *m);
169
170 /*
171 * Returns the size of a memory block without overhead.
172 * This is the size of a data block that can be used.
173 */
174 size_t (*get_user_size)(const struct memory_block *m);
175
176 /* returns pointer to the beginning of data of a run block */
177 void *(*get_real_data)(const struct memory_block *m);
178
179 /* returns the size of a memory block, including headers */
180 size_t (*get_real_size)(const struct memory_block *m);
181
182 /* writes a header of an allocation */
183 void (*write_header)(const struct memory_block *m,
184 uint64_t extra_field, uint16_t flags);
185 void (*invalidate)(const struct memory_block *m);
186
187 /*
188 * Checks the header type of a chunk matches the expected type and
189 * modifies it if necessary. This is fail-safe atomic.
190 */
191 void (*ensure_header_type)(const struct memory_block *m,
192 enum header_type t);
193
194 /*
195 * Reinitializes a block after a heap restart.
196 * This is called for EVERY allocation, but *only* under Valgrind.
197 */
198 void (*reinit_header)(const struct memory_block *m);
199
200 /* returns the extra field of an allocation */
201 uint64_t (*get_extra)(const struct memory_block *m);
202
203 /* returns the flags of an allocation */
204 uint16_t (*get_flags)(const struct memory_block *m);
205
206 /* initializes memblock in valgrind */
207 void (*vg_init)(const struct memory_block *m, int objects,
208 object_callback cb, void *arg);
209
210 /* iterates over every free block */
211 int (*iterate_free)(const struct memory_block *m,
212 object_callback cb, void *arg);
213
214 /* iterates over every used block */
215 int (*iterate_used)(const struct memory_block *m,
216 object_callback cb, void *arg);
217
218 /* calculates number of free units, valid only for runs */
219 void (*calc_free)(const struct memory_block *m,
220 uint32_t *free_space, uint32_t *max_free_block);
221
222 /* this is called exactly once for every existing chunk */
223 void (*reinit_chunk)(const struct memory_block *m);
224
225 /*
226 * Initializes bitmap data for a run.
227 * Do *not* use this function unless absolutely necessary, it breaks
228 * the abstraction layer by exposing implementation details.
229 */
230 void (*get_bitmap)(const struct memory_block *m, struct run_bitmap *b);
231
232 /* calculates the ratio between occupied and unoccupied space */
233 unsigned (*fill_pct)(const struct memory_block *m);
234};
235
236struct memory_block {
237 uint32_t chunk_id; /* index of the memory block in its zone */
238 uint32_t zone_id; /* index of this block zone in the heap */
239
240 /*
241 * Size index of the memory block represented in either multiple of
242 * CHUNKSIZE in the case of a huge chunk or in multiple of a run
243 * block size.
244 */
245 uint32_t size_idx;
246
247 /*
248 * Used only for run chunks, must be zeroed for huge.
249 * Number of preceding blocks in the chunk. In other words, the
250 * position of this memory block in run bitmap.
251 */
252 uint32_t block_off;
253
254 /*
255 * The variables below are associated with the memory block and are
256 * stored here for convenience. Those fields are filled by either the
257 * memblock_from_offset or memblock_rebuild_state, and they should not
258 * be modified manually.
259 */
260 const struct memory_block_ops *m_ops;
261 struct palloc_heap *heap;
262 enum header_type header_type;
263 enum memory_block_type type;
264 struct run_bitmap *cached_bitmap;
265};
266
267/*
268 * This is a representation of a run memory block that is active in a bucket or
269 * is on a pending list in the recycler.
270 * This structure should never be passed around by value because the address of
271 * the nresv variable can be in reservations made through palloc_reserve(). Only
272 * if the number of reservations equals 0 the structure can be moved/freed.
273 */
274struct memory_block_reserved {
275 struct memory_block m;
276
277 struct bucket *bucket;
278 /*
279 * Number of reservations made from this run, the pointer to this value
280 * is stored in a user facing pobj_action structure. Decremented once
281 * the reservation is published or canceled.
282 */
283 int nresv;
284};
285
286struct memory_block memblock_from_offset(struct palloc_heap *heap,
287 uint64_t off);
288struct memory_block memblock_from_offset_opt(struct palloc_heap *heap,
289 uint64_t off, int size);
290void memblock_rebuild_state(struct palloc_heap *heap, struct memory_block *m);
291
292struct memory_block memblock_huge_init(struct palloc_heap *heap,
293 uint32_t chunk_id, uint32_t zone_id, uint32_t size_idx);
294
295struct memory_block memblock_run_init(struct palloc_heap *heap,
296 uint32_t chunk_id, uint32_t zone_id, struct run_descriptor *rdsc);
297
298void memblock_run_bitmap(uint32_t *size_idx, uint16_t flags,
299 uint64_t unit_size, uint64_t alignment, void *content,
300 struct run_bitmap *b);
301
302#ifdef __cplusplus
303}
304#endif
305
306#endif