]>
Commit | Line | Data |
---|---|---|
a4b75251 TL |
1 | /* SPDX-License-Identifier: BSD-3-Clause */ |
2 | /* Copyright 2016-2020, Intel Corporation */ | |
3 | ||
4 | /* | |
5 | * memblock.h -- internal definitions for memory block | |
6 | */ | |
7 | ||
8 | #ifndef LIBPMEMOBJ_MEMBLOCK_H | |
9 | #define LIBPMEMOBJ_MEMBLOCK_H 1 | |
10 | ||
11 | #include <stddef.h> | |
12 | #include <stdint.h> | |
13 | ||
14 | #include "os_thread.h" | |
15 | #include "heap_layout.h" | |
16 | #include "memops.h" | |
17 | #include "palloc.h" | |
18 | ||
19 | #ifdef __cplusplus | |
20 | extern "C" { | |
21 | #endif | |
22 | ||
23 | #define MEMORY_BLOCK_NONE \ | |
24 | (struct memory_block)\ | |
25 | {0, 0, 0, 0, NULL, NULL, MAX_HEADER_TYPES, MAX_MEMORY_BLOCK, NULL} | |
26 | ||
27 | #define MEMORY_BLOCK_IS_NONE(_m)\ | |
28 | ((_m).heap == NULL) | |
29 | ||
30 | #define MEMORY_BLOCK_EQUALS(lhs, rhs)\ | |
31 | ((lhs).zone_id == (rhs).zone_id && (lhs).chunk_id == (rhs).chunk_id &&\ | |
32 | (lhs).block_off == (rhs).block_off && (lhs).heap == (rhs).heap) | |
33 | ||
34 | enum memory_block_type { | |
35 | /* | |
36 | * Huge memory blocks are directly backed by memory chunks. A single | |
37 | * huge block can consist of several chunks. | |
38 | * The persistent representation of huge memory blocks can be thought | |
39 | * of as a doubly linked list with variable length elements. | |
40 | * That list is stored in the chunk headers array where one element | |
41 | * directly corresponds to one chunk. | |
42 | * | |
43 | * U - used, F - free, R - footer, . - empty | |
44 | * |U| represents a used chunk with a size index of 1, with type | |
45 | * information (CHUNK_TYPE_USED) stored in the corresponding header | |
46 | * array element - chunk_headers[chunk_id]. | |
47 | * | |
48 | * |F...R| represents a free chunk with size index of 5. The empty | |
49 | * chunk headers have undefined values and shouldn't be used. All | |
50 | * chunks with size larger than 1 must have a footer in the last | |
51 | * corresponding header array - chunk_headers[chunk_id - size_idx - 1]. | |
52 | * | |
53 | * The above representation of chunks will be used to describe the | |
54 | * way fail-safety is achieved during heap operations. | |
55 | * | |
56 | * Allocation of huge memory block with size index 5: | |
57 | * Initial heap state: |U| <> |F..R| <> |U| <> |F......R| | |
58 | * | |
59 | * The only block that matches that size is at very end of the chunks | |
60 | * list: |F......R| | |
61 | * | |
62 | * As the request was for memory block of size 5, and this ones size is | |
63 | * 7 there's a need to first split the chunk in two. | |
64 | * 1) The last chunk header of the new allocation is marked as footer | |
65 | * and the block after that one is marked as free: |F...RF.R| | |
66 | * This is allowed and has no impact on the heap because this | |
67 | * modification is into chunk header that is otherwise unused, in | |
68 | * other words the linked list didn't change. | |
69 | * | |
70 | * 2) The size index of the first header is changed from previous value | |
71 | * of 7 to 5: |F...R||F.R| | |
72 | * This is a single fail-safe atomic operation and this is the | |
73 | * first change that is noticeable by the heap operations. | |
74 | * A single linked list element is split into two new ones. | |
75 | * | |
76 | * 3) The allocation process either uses redo log or changes directly | |
77 | * the chunk header type from free to used: |U...R| <> |F.R| | |
78 | * | |
79 | * In a similar fashion the reverse operation, free, is performed: | |
80 | * Initial heap state: |U| <> |F..R| <> |F| <> |U...R| <> |F.R| | |
81 | * | |
82 | * This is the heap after the previous example with the single chunk | |
83 | * in between changed from used to free. | |
84 | * | |
85 | * 1) Determine the neighbors of the memory block which is being | |
86 | * freed. | |
87 | * | |
88 | * 2) Update the footer (if needed) information of the last chunk which | |
89 | * is the memory block being freed or it's neighbor to the right. | |
90 | * |F| <> |U...R| <> |F.R << this one| | |
91 | * | |
92 | * 3) Update the size index and type of the left-most chunk header. | |
93 | * And so this: |F << this one| <> |U...R| <> |F.R| | |
94 | * becomes this: |F.......R| | |
95 | * The entire chunk header can be updated in a single fail-safe | |
96 | * atomic operation because it's size is only 64 bytes. | |
97 | */ | |
98 | MEMORY_BLOCK_HUGE, | |
99 | /* | |
100 | * Run memory blocks are chunks with CHUNK_TYPE_RUN and size index of 1. | |
101 | * The entire chunk is subdivided into smaller blocks and has an | |
102 | * additional metadata attached in the form of a bitmap - each bit | |
103 | * corresponds to a single block. | |
104 | * In this case there's no need to perform any coalescing or splitting | |
105 | * on the persistent metadata. | |
106 | * The bitmap is stored on a variable number of 64 bit values and | |
107 | * because of the requirement of allocation fail-safe atomicity the | |
108 | * maximum size index of a memory block from a run is 64 - since that's | |
109 | * the limit of atomic write guarantee. | |
110 | * | |
111 | * The allocation/deallocation process is a single 8 byte write that | |
112 | * sets/clears the corresponding bits. Depending on the user choice | |
113 | * it can either be made atomically or using redo-log when grouped with | |
114 | * other operations. | |
115 | * It's also important to note that in a case of realloc it might so | |
116 | * happen that a single 8 byte bitmap value has its bits both set and | |
117 | * cleared - that's why the run memory block metadata changes operate | |
118 | * on AND'ing or OR'ing a bitmask instead of directly setting the value. | |
119 | */ | |
120 | MEMORY_BLOCK_RUN, | |
121 | ||
122 | MAX_MEMORY_BLOCK | |
123 | }; | |
124 | ||
125 | enum memblock_state { | |
126 | MEMBLOCK_STATE_UNKNOWN, | |
127 | MEMBLOCK_ALLOCATED, | |
128 | MEMBLOCK_FREE, | |
129 | ||
130 | MAX_MEMBLOCK_STATE, | |
131 | }; | |
132 | ||
133 | /* runtime bitmap information for a run */ | |
134 | struct run_bitmap { | |
135 | unsigned nvalues; /* number of 8 byte values - size of values array */ | |
136 | unsigned nbits; /* number of valid bits */ | |
137 | ||
138 | size_t size; /* total size of the bitmap in bytes */ | |
139 | ||
140 | uint64_t *values; /* pointer to the bitmap's values array */ | |
141 | }; | |
142 | ||
143 | /* runtime information necessary to create a run */ | |
144 | struct run_descriptor { | |
145 | uint16_t flags; /* chunk flags for the run */ | |
146 | size_t unit_size; /* the size of a single unit in a run */ | |
147 | uint32_t size_idx; /* size index of a single run instance */ | |
148 | size_t alignment; /* required alignment of objects */ | |
149 | unsigned nallocs; /* number of allocs per run */ | |
150 | struct run_bitmap bitmap; | |
151 | }; | |
152 | ||
153 | struct memory_block_ops { | |
154 | /* returns memory block size */ | |
155 | size_t (*block_size)(const struct memory_block *m); | |
156 | ||
157 | /* prepares header modification operation */ | |
158 | void (*prep_hdr)(const struct memory_block *m, | |
159 | enum memblock_state dest_state, struct operation_context *ctx); | |
160 | ||
161 | /* returns lock associated with memory block */ | |
162 | os_mutex_t *(*get_lock)(const struct memory_block *m); | |
163 | ||
164 | /* returns whether a block is allocated or not */ | |
165 | enum memblock_state (*get_state)(const struct memory_block *m); | |
166 | ||
167 | /* returns pointer to the data of a block */ | |
168 | void *(*get_user_data)(const struct memory_block *m); | |
169 | ||
170 | /* | |
171 | * Returns the size of a memory block without overhead. | |
172 | * This is the size of a data block that can be used. | |
173 | */ | |
174 | size_t (*get_user_size)(const struct memory_block *m); | |
175 | ||
176 | /* returns pointer to the beginning of data of a run block */ | |
177 | void *(*get_real_data)(const struct memory_block *m); | |
178 | ||
179 | /* returns the size of a memory block, including headers */ | |
180 | size_t (*get_real_size)(const struct memory_block *m); | |
181 | ||
182 | /* writes a header of an allocation */ | |
183 | void (*write_header)(const struct memory_block *m, | |
184 | uint64_t extra_field, uint16_t flags); | |
185 | void (*invalidate)(const struct memory_block *m); | |
186 | ||
187 | /* | |
188 | * Checks the header type of a chunk matches the expected type and | |
189 | * modifies it if necessary. This is fail-safe atomic. | |
190 | */ | |
191 | void (*ensure_header_type)(const struct memory_block *m, | |
192 | enum header_type t); | |
193 | ||
194 | /* | |
195 | * Reinitializes a block after a heap restart. | |
196 | * This is called for EVERY allocation, but *only* under Valgrind. | |
197 | */ | |
198 | void (*reinit_header)(const struct memory_block *m); | |
199 | ||
200 | /* returns the extra field of an allocation */ | |
201 | uint64_t (*get_extra)(const struct memory_block *m); | |
202 | ||
203 | /* returns the flags of an allocation */ | |
204 | uint16_t (*get_flags)(const struct memory_block *m); | |
205 | ||
206 | /* initializes memblock in valgrind */ | |
207 | void (*vg_init)(const struct memory_block *m, int objects, | |
208 | object_callback cb, void *arg); | |
209 | ||
210 | /* iterates over every free block */ | |
211 | int (*iterate_free)(const struct memory_block *m, | |
212 | object_callback cb, void *arg); | |
213 | ||
214 | /* iterates over every used block */ | |
215 | int (*iterate_used)(const struct memory_block *m, | |
216 | object_callback cb, void *arg); | |
217 | ||
218 | /* calculates number of free units, valid only for runs */ | |
219 | void (*calc_free)(const struct memory_block *m, | |
220 | uint32_t *free_space, uint32_t *max_free_block); | |
221 | ||
222 | /* this is called exactly once for every existing chunk */ | |
223 | void (*reinit_chunk)(const struct memory_block *m); | |
224 | ||
225 | /* | |
226 | * Initializes bitmap data for a run. | |
227 | * Do *not* use this function unless absolutely necessary, it breaks | |
228 | * the abstraction layer by exposing implementation details. | |
229 | */ | |
230 | void (*get_bitmap)(const struct memory_block *m, struct run_bitmap *b); | |
231 | ||
232 | /* calculates the ratio between occupied and unoccupied space */ | |
233 | unsigned (*fill_pct)(const struct memory_block *m); | |
234 | }; | |
235 | ||
236 | struct memory_block { | |
237 | uint32_t chunk_id; /* index of the memory block in its zone */ | |
238 | uint32_t zone_id; /* index of this block zone in the heap */ | |
239 | ||
240 | /* | |
241 | * Size index of the memory block represented in either multiple of | |
242 | * CHUNKSIZE in the case of a huge chunk or in multiple of a run | |
243 | * block size. | |
244 | */ | |
245 | uint32_t size_idx; | |
246 | ||
247 | /* | |
248 | * Used only for run chunks, must be zeroed for huge. | |
249 | * Number of preceding blocks in the chunk. In other words, the | |
250 | * position of this memory block in run bitmap. | |
251 | */ | |
252 | uint32_t block_off; | |
253 | ||
254 | /* | |
255 | * The variables below are associated with the memory block and are | |
256 | * stored here for convenience. Those fields are filled by either the | |
257 | * memblock_from_offset or memblock_rebuild_state, and they should not | |
258 | * be modified manually. | |
259 | */ | |
260 | const struct memory_block_ops *m_ops; | |
261 | struct palloc_heap *heap; | |
262 | enum header_type header_type; | |
263 | enum memory_block_type type; | |
264 | struct run_bitmap *cached_bitmap; | |
265 | }; | |
266 | ||
267 | /* | |
268 | * This is a representation of a run memory block that is active in a bucket or | |
269 | * is on a pending list in the recycler. | |
270 | * This structure should never be passed around by value because the address of | |
271 | * the nresv variable can be in reservations made through palloc_reserve(). Only | |
272 | * if the number of reservations equals 0 the structure can be moved/freed. | |
273 | */ | |
274 | struct memory_block_reserved { | |
275 | struct memory_block m; | |
276 | ||
277 | struct bucket *bucket; | |
278 | /* | |
279 | * Number of reservations made from this run, the pointer to this value | |
280 | * is stored in a user facing pobj_action structure. Decremented once | |
281 | * the reservation is published or canceled. | |
282 | */ | |
283 | int nresv; | |
284 | }; | |
285 | ||
286 | struct memory_block memblock_from_offset(struct palloc_heap *heap, | |
287 | uint64_t off); | |
288 | struct memory_block memblock_from_offset_opt(struct palloc_heap *heap, | |
289 | uint64_t off, int size); | |
290 | void memblock_rebuild_state(struct palloc_heap *heap, struct memory_block *m); | |
291 | ||
292 | struct memory_block memblock_huge_init(struct palloc_heap *heap, | |
293 | uint32_t chunk_id, uint32_t zone_id, uint32_t size_idx); | |
294 | ||
295 | struct memory_block memblock_run_init(struct palloc_heap *heap, | |
296 | uint32_t chunk_id, uint32_t zone_id, struct run_descriptor *rdsc); | |
297 | ||
298 | void memblock_run_bitmap(uint32_t *size_idx, uint16_t flags, | |
299 | uint64_t unit_size, uint64_t alignment, void *content, | |
300 | struct run_bitmap *b); | |
301 | ||
302 | #ifdef __cplusplus | |
303 | } | |
304 | #endif | |
305 | ||
306 | #endif |