1 /* $Id: alloc-r0drv-linux.c $ */
3 * IPRT - Memory Allocation, Ring-0 Driver, Linux.
7 * Copyright (C) 2006-2016 Oracle Corporation
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
28 /*********************************************************************************************************************************
30 *********************************************************************************************************************************/
31 #include "the-linux-kernel.h"
32 #include "internal/iprt.h"
35 #include <iprt/assert.h>
37 #include "r0drv/alloc-r0drv.h"
40 #if (defined(RT_ARCH_AMD64) || defined(DOXYGEN_RUNNING)) && !defined(RTMEMALLOC_EXEC_HEAP)
41 # if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23)
43 * Starting with 2.6.23 we can use __get_vm_area and map_vm_area to allocate
44 * memory in the moduel range. This is preferrable to the exec heap below.
46 # define RTMEMALLOC_EXEC_VM_AREA
49 * We need memory in the module range (~2GB to ~0) this can only be obtained
50 * thru APIs that are not exported (see module_alloc()).
52 * So, we'll have to create a quick and dirty heap here using BSS memory.
53 * Very annoying and it's going to restrict us!
55 # define RTMEMALLOC_EXEC_HEAP
59 #ifdef RTMEMALLOC_EXEC_HEAP
60 # include <iprt/heap.h>
61 # include <iprt/spinlock.h>
62 # include <iprt/err.h>
66 /*********************************************************************************************************************************
67 * Structures and Typedefs *
68 *********************************************************************************************************************************/
69 #ifdef RTMEMALLOC_EXEC_VM_AREA
71 * Extended header used for headers marked with RTMEMHDR_FLAG_EXEC_VM_AREA.
73 * This is used with allocating executable memory, for things like generated
74 * code and loaded modules.
76 typedef struct RTMEMLNXHDREX
78 /** The VM area for this allocation. */
79 struct vm_struct
*pVmArea
;
81 /** The header we present to the generic API. */
84 AssertCompileSize(RTMEMLNXHDREX
, 32);
85 /** Pointer to an extended memory header. */
86 typedef RTMEMLNXHDREX
*PRTMEMLNXHDREX
;
90 /*********************************************************************************************************************************
92 *********************************************************************************************************************************/
93 #ifdef RTMEMALLOC_EXEC_HEAP
95 static RTHEAPSIMPLE g_HeapExec
= NIL_RTHEAPSIMPLE
;
96 /** Spinlock protecting the heap. */
97 static RTSPINLOCK g_HeapExecSpinlock
= NIL_RTSPINLOCK
;
102 * API for cleaning up the heap spinlock on IPRT termination.
103 * This is as RTMemExecDonate specific to AMD64 Linux/GNU.
105 DECLHIDDEN(void) rtR0MemExecCleanup(void)
107 #ifdef RTMEMALLOC_EXEC_HEAP
108 RTSpinlockDestroy(g_HeapExecSpinlock
);
109 g_HeapExecSpinlock
= NIL_RTSPINLOCK
;
115 * Donate read+write+execute memory to the exec heap.
117 * This API is specific to AMD64 and Linux/GNU. A kernel module that desires to
118 * use RTMemExecAlloc on AMD64 Linux/GNU will have to donate some statically
119 * allocated memory in the module if it wishes for GCC generated code to work.
120 * GCC can only generate modules that work in the address range ~2GB to ~0
123 * The API only accept one single donation.
125 * @returns IPRT status code.
126 * @retval VERR_NOT_SUPPORTED if the code isn't enabled.
127 * @param pvMemory Pointer to the memory block.
128 * @param cb The size of the memory block.
130 RTR0DECL(int) RTR0MemExecDonate(void *pvMemory
, size_t cb
)
132 #ifdef RTMEMALLOC_EXEC_HEAP
134 AssertReturn(g_HeapExec
== NIL_RTHEAPSIMPLE
, VERR_WRONG_ORDER
);
136 rc
= RTSpinlockCreate(&g_HeapExecSpinlock
, RTSPINLOCK_FLAGS_INTERRUPT_SAFE
, "RTR0MemExecDonate");
139 rc
= RTHeapSimpleInit(&g_HeapExec
, pvMemory
, cb
);
141 rtR0MemExecCleanup();
145 RT_NOREF_PV(pvMemory
); RT_NOREF_PV(cb
);
146 return VERR_NOT_SUPPORTED
;
149 RT_EXPORT_SYMBOL(RTR0MemExecDonate
);
153 #ifdef RTMEMALLOC_EXEC_VM_AREA
155 * Allocate executable kernel memory in the module range.
157 * @returns Pointer to a allocation header success. NULL on failure.
159 * @param cb The size the user requested.
161 static PRTMEMHDR
rtR0MemAllocExecVmArea(size_t cb
)
163 size_t const cbAlloc
= RT_ALIGN_Z(sizeof(RTMEMLNXHDREX
) + cb
, PAGE_SIZE
);
164 size_t const cPages
= cbAlloc
>> PAGE_SHIFT
;
165 struct page
**papPages
;
166 struct vm_struct
*pVmArea
;
169 pVmArea
= __get_vm_area(cbAlloc
, VM_ALLOC
, MODULES_VADDR
, MODULES_END
);
172 pVmArea
->nr_pages
= 0; /* paranoia? */
173 pVmArea
->pages
= NULL
; /* paranoia? */
175 papPages
= (struct page
**)kmalloc(cPages
* sizeof(papPages
[0]), GFP_KERNEL
| __GFP_NOWARN
);
178 vunmap(pVmArea
->addr
);
182 for (iPage
= 0; iPage
< cPages
; iPage
++)
184 papPages
[iPage
] = alloc_page(GFP_KERNEL
| __GFP_HIGHMEM
| __GFP_NOWARN
);
185 if (!papPages
[iPage
])
193 * Not entirely sure we really need to set nr_pages and pages here, but
194 * they provide a very convenient place for storing something we need
195 * in the free function, if nothing else...
197 # if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
198 struct page
**papPagesIterator
= papPages
;
200 pVmArea
->nr_pages
= cPages
;
201 pVmArea
->pages
= papPages
;
202 if (!map_vm_area(pVmArea
, PAGE_KERNEL_EXEC
,
203 # if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
210 PRTMEMLNXHDREX pHdrEx
= (PRTMEMLNXHDREX
)pVmArea
->addr
;
211 pHdrEx
->pVmArea
= pVmArea
;
212 pHdrEx
->pvDummy
= NULL
;
216 # if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
217 pVmArea
->nr_pages
= papPagesIterator
- papPages
;
221 vunmap(pVmArea
->addr
);
224 __free_page(papPages
[iPage
]);
229 #endif /* RTMEMALLOC_EXEC_VM_AREA */
233 * OS specific allocation function.
235 DECLHIDDEN(int) rtR0MemAllocEx(size_t cb
, uint32_t fFlags
, PRTMEMHDR
*ppHdr
)
238 IPRT_LINUX_SAVE_EFL_AC();
243 if (fFlags
& RTMEMHDR_FLAG_EXEC
)
245 if (fFlags
& RTMEMHDR_FLAG_ANY_CTX
)
246 return VERR_NOT_SUPPORTED
;
248 #if defined(RT_ARCH_AMD64)
249 # ifdef RTMEMALLOC_EXEC_HEAP
250 if (g_HeapExec
!= NIL_RTHEAPSIMPLE
)
252 RTSpinlockAcquire(g_HeapExecSpinlock
);
253 pHdr
= (PRTMEMHDR
)RTHeapSimpleAlloc(g_HeapExec
, cb
+ sizeof(*pHdr
), 0);
254 RTSpinlockRelease(g_HeapExecSpinlock
);
255 fFlags
|= RTMEMHDR_FLAG_EXEC_HEAP
;
260 # elif defined(RTMEMALLOC_EXEC_VM_AREA)
261 pHdr
= rtR0MemAllocExecVmArea(cb
);
262 fFlags
|= RTMEMHDR_FLAG_EXEC_VM_AREA
;
264 # else /* !RTMEMALLOC_EXEC_HEAP */
265 # error "you don not want to go here..."
266 pHdr
= (PRTMEMHDR
)__vmalloc(cb
+ sizeof(*pHdr
), GFP_KERNEL
| __GFP_HIGHMEM
| __GFP_NOWARN
, MY_PAGE_KERNEL_EXEC
);
267 # endif /* !RTMEMALLOC_EXEC_HEAP */
269 #elif defined(PAGE_KERNEL_EXEC) && defined(CONFIG_X86_PAE)
270 pHdr
= (PRTMEMHDR
)__vmalloc(cb
+ sizeof(*pHdr
), GFP_KERNEL
| __GFP_HIGHMEM
| __GFP_NOWARN
, MY_PAGE_KERNEL_EXEC
);
272 pHdr
= (PRTMEMHDR
)vmalloc(cb
+ sizeof(*pHdr
));
278 #if 1 /* vmalloc has serious performance issues, avoid it. */
279 cb
<= PAGE_SIZE
*16 - sizeof(*pHdr
)
283 || (fFlags
& RTMEMHDR_FLAG_ANY_CTX
)
286 fFlags
|= RTMEMHDR_FLAG_KMALLOC
;
287 pHdr
= kmalloc(cb
+ sizeof(*pHdr
),
288 (fFlags
& RTMEMHDR_FLAG_ANY_CTX_ALLOC
) ? (GFP_ATOMIC
| __GFP_NOWARN
)
289 : (GFP_KERNEL
| __GFP_NOWARN
));
290 if (RT_UNLIKELY( !pHdr
292 && !(fFlags
& RTMEMHDR_FLAG_ANY_CTX
) ))
294 fFlags
&= ~RTMEMHDR_FLAG_KMALLOC
;
295 pHdr
= vmalloc(cb
+ sizeof(*pHdr
));
299 pHdr
= vmalloc(cb
+ sizeof(*pHdr
));
301 if (RT_UNLIKELY(!pHdr
))
303 IPRT_LINUX_RESTORE_EFL_AC();
304 return VERR_NO_MEMORY
;
310 pHdr
->u32Magic
= RTMEMHDR_MAGIC
;
311 pHdr
->fFlags
= fFlags
;
316 IPRT_LINUX_RESTORE_EFL_AC();
322 * OS specific free function.
324 DECLHIDDEN(void) rtR0MemFree(PRTMEMHDR pHdr
)
326 IPRT_LINUX_SAVE_EFL_AC();
329 if (pHdr
->fFlags
& RTMEMHDR_FLAG_KMALLOC
)
331 #ifdef RTMEMALLOC_EXEC_HEAP
332 else if (pHdr
->fFlags
& RTMEMHDR_FLAG_EXEC_HEAP
)
334 RTSpinlockAcquire(g_HeapExecSpinlock
);
335 RTHeapSimpleFree(g_HeapExec
, pHdr
);
336 RTSpinlockRelease(g_HeapExecSpinlock
);
339 #ifdef RTMEMALLOC_EXEC_VM_AREA
340 else if (pHdr
->fFlags
& RTMEMHDR_FLAG_EXEC_VM_AREA
)
342 PRTMEMLNXHDREX pHdrEx
= RT_FROM_MEMBER(pHdr
, RTMEMLNXHDREX
, Hdr
);
343 size_t iPage
= pHdrEx
->pVmArea
->nr_pages
;
344 struct page
**papPages
= pHdrEx
->pVmArea
->pages
;
345 void *pvMapping
= pHdrEx
->pVmArea
->addr
;
350 __free_page(papPages
[iPage
]);
357 IPRT_LINUX_RESTORE_EFL_AC();
363 * Compute order. Some functions allocate 2^order pages.
366 * @param cPages Number of pages.
368 static int CalcPowerOf2Order(unsigned long cPages
)
373 for (iOrder
= 0, cTmp
= cPages
; cTmp
>>= 1; ++iOrder
)
375 if (cPages
& ~(1 << iOrder
))
383 * Allocates physical contiguous memory (below 4GB).
384 * The allocation is page aligned and the content is undefined.
386 * @returns Pointer to the memory block. This is page aligned.
387 * @param pPhys Where to store the physical address.
388 * @param cb The allocation size in bytes. This is always
389 * rounded up to PAGE_SIZE.
391 RTR0DECL(void *) RTMemContAlloc(PRTCCPHYS pPhys
, size_t cb
)
395 struct page
*paPages
;
397 IPRT_LINUX_SAVE_EFL_AC();
402 Assert(VALID_PTR(pPhys
));
406 * Allocate page pointer array.
408 cb
= RT_ALIGN_Z(cb
, PAGE_SIZE
);
409 cPages
= cb
>> PAGE_SHIFT
;
410 cOrder
= CalcPowerOf2Order(cPages
);
411 #if (defined(RT_ARCH_AMD64) || defined(CONFIG_X86_PAE)) && defined(GFP_DMA32)
412 /* ZONE_DMA32: 0-4GB */
413 paPages
= alloc_pages(GFP_DMA32
| __GFP_NOWARN
, cOrder
);
417 /* ZONE_DMA; 0-16MB */
418 paPages
= alloc_pages(GFP_DMA
| __GFP_NOWARN
, cOrder
);
420 /* ZONE_NORMAL: 0-896MB */
421 paPages
= alloc_pages(GFP_USER
| __GFP_NOWARN
, cOrder
);
426 * Reserve the pages and mark them executable.
429 for (iPage
= 0; iPage
< cPages
; iPage
++)
431 Assert(!PageHighMem(&paPages
[iPage
]));
432 if (iPage
+ 1 < cPages
)
434 AssertMsg( (uintptr_t)phys_to_virt(page_to_phys(&paPages
[iPage
])) + PAGE_SIZE
435 == (uintptr_t)phys_to_virt(page_to_phys(&paPages
[iPage
+ 1]))
436 && page_to_phys(&paPages
[iPage
]) + PAGE_SIZE
437 == page_to_phys(&paPages
[iPage
+ 1]),
438 ("iPage=%i cPages=%u [0]=%#llx,%p [1]=%#llx,%p\n", iPage
, cPages
,
439 (long long)page_to_phys(&paPages
[iPage
]), phys_to_virt(page_to_phys(&paPages
[iPage
])),
440 (long long)page_to_phys(&paPages
[iPage
+ 1]), phys_to_virt(page_to_phys(&paPages
[iPage
+ 1])) ));
443 SetPageReserved(&paPages
[iPage
]);
444 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 4, 20) /** @todo find the exact kernel where change_page_attr was introduced. */
445 MY_SET_PAGES_EXEC(&paPages
[iPage
], 1);
448 *pPhys
= page_to_phys(paPages
);
449 pvRet
= phys_to_virt(page_to_phys(paPages
));
454 IPRT_LINUX_RESTORE_EFL_AC();
457 RT_EXPORT_SYMBOL(RTMemContAlloc
);
461 * Frees memory allocated using RTMemContAlloc().
463 * @param pv Pointer to return from RTMemContAlloc().
464 * @param cb The cb parameter passed to RTMemContAlloc().
466 RTR0DECL(void) RTMemContFree(void *pv
, size_t cb
)
473 struct page
*paPages
;
474 IPRT_LINUX_SAVE_EFL_AC();
477 AssertMsg(!((uintptr_t)pv
& PAGE_OFFSET_MASK
), ("pv=%p\n", pv
));
480 /* calc order and get pages */
481 cb
= RT_ALIGN_Z(cb
, PAGE_SIZE
);
482 cPages
= cb
>> PAGE_SHIFT
;
483 cOrder
= CalcPowerOf2Order(cPages
);
484 paPages
= virt_to_page(pv
);
487 * Restore page attributes freeing the pages.
489 for (iPage
= 0; iPage
< cPages
; iPage
++)
491 ClearPageReserved(&paPages
[iPage
]);
492 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 4, 20) /** @todo find the exact kernel where change_page_attr was introduced. */
493 MY_SET_PAGES_NOEXEC(&paPages
[iPage
], 1);
496 __free_pages(paPages
, cOrder
);
497 IPRT_LINUX_RESTORE_EFL_AC();
500 RT_EXPORT_SYMBOL(RTMemContFree
);