1 // SPDX-License-Identifier: BSD-3-Clause
2 /* Copyright 2014-2020, Intel Corporation */
5 * mmap.c -- mmap utilities
26 static os_rwlock_t Mmap_list_lock
;
28 static PMDK_SORTEDQ_HEAD(map_list_head
, map_tracker
) Mmap_list
=
29 PMDK_SORTEDQ_HEAD_INITIALIZER(Mmap_list
);
32 * util_mmap_init -- initialize the mmap utils
34 * This is called from the library initialization code.
41 util_rwlock_init(&Mmap_list_lock
);
44 * For testing, allow overriding the default mmap() hint address.
45 * If hint address is defined, it also disables address randomization.
47 char *e
= os_getenv("PMEM_MMAP_HINT");
51 unsigned long long val
= strtoull(e
, &endp
, 16);
53 if (errno
|| endp
== e
) {
54 LOG(2, "Invalid PMEM_MMAP_HINT");
55 } else if (os_access(OS_MAPFILE
, R_OK
)) {
56 LOG(2, "No /proc, PMEM_MMAP_HINT ignored");
58 Mmap_hint
= (void *)val
;
60 LOG(3, "PMEM_MMAP_HINT set to %p", Mmap_hint
);
66 * util_mmap_fini -- clean up the mmap utils
68 * This is called before process stop.
75 util_rwlock_destroy(&Mmap_list_lock
);
79 * util_map -- memory map a file
81 * This is just a convenience function that calls mmap() with the
82 * appropriate arguments and includes our trace points.
85 util_map(int fd
, os_off_t off
, size_t len
, int flags
, int rdonly
,
86 size_t req_align
, int *map_sync
)
88 LOG(3, "fd %d len %zu flags %d rdonly %d req_align %zu map_sync %p",
89 fd
, len
, flags
, rdonly
, req_align
, map_sync
);
92 void *addr
= util_map_hint(len
, req_align
);
93 if (addr
== MAP_FAILED
) {
94 LOG(1, "cannot find a contiguous region of given size");
99 ASSERTeq((uintptr_t)addr
% req_align
, 0);
101 int proto
= rdonly
? PROT_READ
: PROT_READ
|PROT_WRITE
;
102 base
= util_map_sync(addr
, len
, proto
, flags
, fd
, off
, map_sync
);
103 if (base
== MAP_FAILED
) {
104 ERR("!mmap %zu bytes", len
);
108 LOG(3, "mapped at %p", base
);
114 * util_unmap -- unmap a file
116 * This is just a convenience function that calls munmap() with the
117 * appropriate arguments and includes our trace points.
120 util_unmap(void *addr
, size_t len
)
122 LOG(3, "addr %p len %zu", addr
, len
);
125 * XXX Workaround for https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=169608
128 if (!IS_PAGE_ALIGNED((uintptr_t)addr
)) {
134 int retval
= munmap(addr
, len
);
142 * util_range_ro -- set a memory range read-only
145 util_range_ro(void *addr
, size_t len
)
147 LOG(3, "addr %p len %zu", addr
, len
);
153 * mprotect requires addr to be a multiple of pagesize, so
154 * adjust addr and len to represent the full 4k chunks
155 * covering the given range.
158 /* increase len by the amount we gain when we round addr down */
159 len
+= (uintptr_t)addr
& (Pagesize
- 1);
161 /* round addr down to page boundary */
162 uptr
= (uintptr_t)addr
& ~(Pagesize
- 1);
164 if ((retval
= mprotect((void *)uptr
, len
, PROT_READ
)) < 0)
165 ERR("!mprotect: PROT_READ");
171 * util_range_rw -- set a memory range read-write
174 util_range_rw(void *addr
, size_t len
)
176 LOG(3, "addr %p len %zu", addr
, len
);
182 * mprotect requires addr to be a multiple of pagesize, so
183 * adjust addr and len to represent the full 4k chunks
184 * covering the given range.
187 /* increase len by the amount we gain when we round addr down */
188 len
+= (uintptr_t)addr
& (Pagesize
- 1);
190 /* round addr down to page boundary */
191 uptr
= (uintptr_t)addr
& ~(Pagesize
- 1);
193 if ((retval
= mprotect((void *)uptr
, len
, PROT_READ
|PROT_WRITE
)) < 0)
194 ERR("!mprotect: PROT_READ|PROT_WRITE");
200 * util_range_none -- set a memory range for no access allowed
203 util_range_none(void *addr
, size_t len
)
205 LOG(3, "addr %p len %zu", addr
, len
);
211 * mprotect requires addr to be a multiple of pagesize, so
212 * adjust addr and len to represent the full 4k chunks
213 * covering the given range.
216 /* increase len by the amount we gain when we round addr down */
217 len
+= (uintptr_t)addr
& (Pagesize
- 1);
219 /* round addr down to page boundary */
220 uptr
= (uintptr_t)addr
& ~(Pagesize
- 1);
222 if ((retval
= mprotect((void *)uptr
, len
, PROT_NONE
)) < 0)
223 ERR("!mprotect: PROT_NONE");
229 * util_range_comparer -- (internal) compares the two mapping trackers
232 util_range_comparer(struct map_tracker
*a
, struct map_tracker
*b
)
234 return ((intptr_t)a
->base_addr
- (intptr_t)b
->base_addr
);
238 * util_range_find_unlocked -- (internal) find the map tracker
239 * for given address range
241 * Returns the first entry at least partially overlapping given range.
242 * It's up to the caller to check whether the entry exactly matches the range,
243 * or if the range spans multiple entries.
245 static struct map_tracker
*
246 util_range_find_unlocked(uintptr_t addr
, size_t len
)
248 LOG(10, "addr 0x%016" PRIxPTR
" len %zu", addr
, len
);
250 uintptr_t end
= addr
+ len
;
252 struct map_tracker
*mt
;
254 PMDK_SORTEDQ_FOREACH(mt
, &Mmap_list
, entry
) {
255 if (addr
< mt
->end_addr
&&
256 (addr
>= mt
->base_addr
|| end
> mt
->base_addr
))
259 /* break if there is no chance to find matching entry */
260 if (addr
< mt
->base_addr
)
270 * util_range_find -- find the map tracker for given address range
271 * the same as util_range_find_unlocked but locked
274 util_range_find(uintptr_t addr
, size_t len
)
276 LOG(10, "addr 0x%016" PRIxPTR
" len %zu", addr
, len
);
278 util_rwlock_rdlock(&Mmap_list_lock
);
280 struct map_tracker
*mt
= util_range_find_unlocked(addr
, len
);
282 util_rwlock_unlock(&Mmap_list_lock
);
287 * util_range_register -- add a memory range into a map tracking list
290 util_range_register(const void *addr
, size_t len
, const char *path
,
291 enum pmem_map_type type
)
293 LOG(3, "addr %p len %zu path %s type %d", addr
, len
, path
, type
);
295 /* check if not tracked already */
296 if (util_range_find((uintptr_t)addr
, len
) != NULL
) {
298 "duplicated persistent memory range; presumably unmapped with munmap() instead of pmem_unmap(): addr %p len %zu",
304 struct map_tracker
*mt
;
305 mt
= Malloc(sizeof(struct map_tracker
));
311 mt
->base_addr
= (uintptr_t)addr
;
312 mt
->end_addr
= mt
->base_addr
+ len
;
314 if (type
== PMEM_DEV_DAX
) {
316 int ret
= util_ddax_region_find(path
, ®ion_id
);
318 ERR("Cannot find DAX device region id");
321 mt
->region_id
= region_id
;
324 util_rwlock_wrlock(&Mmap_list_lock
);
326 PMDK_SORTEDQ_INSERT(&Mmap_list
, mt
, entry
, struct map_tracker
,
327 util_range_comparer
);
329 util_rwlock_unlock(&Mmap_list_lock
);
335 * util_range_split -- (internal) remove or split a map tracking entry
338 util_range_split(struct map_tracker
*mt
, const void *addrp
, const void *endp
)
340 LOG(3, "begin %p end %p", addrp
, endp
);
342 uintptr_t addr
= (uintptr_t)addrp
;
343 uintptr_t end
= (uintptr_t)endp
;
345 if (addr
== end
|| addr
% Mmap_align
!= 0 || end
% Mmap_align
!= 0) {
347 "invalid munmap length, must be non-zero and page aligned");
351 struct map_tracker
*mtb
= NULL
;
352 struct map_tracker
*mte
= NULL
;
356 * xxxxxxxxxxxxx => xxx.......xxxx - mtb+mte
358 * xxxxxxxxxxxxx => xxxxxxx....... - mtb
360 * xxxxxxxxxxxxx => ........xxxxxx - mte
362 * xxxxxxxxxxxxx => .............. - <none>
365 if (addr
> mt
->base_addr
) {
367 /* new mapping at the beginning */
368 mtb
= Malloc(sizeof(struct map_tracker
));
374 mtb
->base_addr
= mt
->base_addr
;
375 mtb
->end_addr
= addr
;
376 mtb
->region_id
= mt
->region_id
;
377 mtb
->type
= mt
->type
;
380 if (end
< mt
->end_addr
) {
382 /* new mapping at the end */
383 mte
= Malloc(sizeof(struct map_tracker
));
389 mte
->base_addr
= end
;
390 mte
->end_addr
= mt
->end_addr
;
391 mte
->region_id
= mt
->region_id
;
392 mte
->type
= mt
->type
;
395 PMDK_SORTEDQ_REMOVE(&Mmap_list
, mt
, entry
);
398 PMDK_SORTEDQ_INSERT(&Mmap_list
, mtb
, entry
,
399 struct map_tracker
, util_range_comparer
);
403 PMDK_SORTEDQ_INSERT(&Mmap_list
, mte
, entry
,
404 struct map_tracker
, util_range_comparer
);
407 /* free entry for the original mapping */
418 * util_range_unregister -- remove a memory range
419 * from map tracking list
421 * Remove the region between [begin,end]. If it's in a middle of the existing
422 * mapping, it results in two new map trackers.
425 util_range_unregister(const void *addr
, size_t len
)
427 LOG(3, "addr %p len %zu", addr
, len
);
431 util_rwlock_wrlock(&Mmap_list_lock
);
434 * Changes in the map tracker list must match the underlying behavior.
437 * The address addr must be a multiple of the page size (but length
438 * need not be). All pages containing a part of the indicated range
441 * This means that we must align the length to the page size.
443 len
= PAGE_ALIGNED_UP_SIZE(len
);
445 void *end
= (char *)addr
+ len
;
447 /* XXX optimize the loop */
448 struct map_tracker
*mt
;
449 while ((mt
= util_range_find_unlocked((uintptr_t)addr
, len
)) != NULL
) {
450 if (util_range_split(mt
, addr
, end
) != 0) {
456 util_rwlock_unlock(&Mmap_list_lock
);
461 * util_range_is_pmem -- return true if entire range
462 * is persistent memory
465 util_range_is_pmem(const void *addrp
, size_t len
)
467 LOG(10, "addr %p len %zu", addrp
, len
);
469 uintptr_t addr
= (uintptr_t)addrp
;
472 util_rwlock_rdlock(&Mmap_list_lock
);
475 struct map_tracker
*mt
= util_range_find(addr
, len
);
477 LOG(4, "address not found 0x%016" PRIxPTR
, addr
);
482 LOG(10, "range found - begin 0x%016" PRIxPTR
483 " end 0x%016" PRIxPTR
,
484 mt
->base_addr
, mt
->end_addr
);
486 if (mt
->base_addr
> addr
) {
487 LOG(10, "base address doesn't match: "
488 "0x%" PRIxPTR
" > 0x%" PRIxPTR
,
489 mt
->base_addr
, addr
);
494 uintptr_t map_len
= mt
->end_addr
- addr
;
501 util_rwlock_unlock(&Mmap_list_lock
);