]>
Commit | Line | Data |
---|---|---|
056a1eb7 SF |
1 | /* $Id: alloc-r0drv-linux.c $ */ |
2 | /** @file | |
3 | * IPRT - Memory Allocation, Ring-0 Driver, Linux. | |
4 | */ | |
5 | ||
6 | /* | |
6d209b23 | 7 | * Copyright (C) 2006-2017 Oracle Corporation |
056a1eb7 SF |
8 | * |
9 | * This file is part of VirtualBox Open Source Edition (OSE), as | |
10 | * available from http://www.virtualbox.org. This file is free software; | |
11 | * you can redistribute it and/or modify it under the terms of the GNU | |
12 | * General Public License (GPL) as published by the Free Software | |
13 | * Foundation, in version 2 as it comes in the "COPYING" file of the | |
14 | * VirtualBox OSE distribution. VirtualBox OSE is distributed in the | |
15 | * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. | |
16 | * | |
17 | * The contents of this file may alternatively be used under the terms | |
18 | * of the Common Development and Distribution License Version 1.0 | |
19 | * (CDDL) only, as it comes in the "COPYING.CDDL" file of the | |
20 | * VirtualBox OSE distribution, in which case the provisions of the | |
21 | * CDDL are applicable instead of those of the GPL. | |
22 | * | |
23 | * You may elect to license modified versions of this file under the | |
24 | * terms and conditions of either the GPL or the CDDL or both. | |
25 | */ | |
26 | ||
27 | ||
28 | /********************************************************************************************************************************* | |
29 | * Header Files * | |
30 | *********************************************************************************************************************************/ | |
31 | #include "the-linux-kernel.h" | |
32 | #include "internal/iprt.h" | |
33 | #include <iprt/mem.h> | |
34 | ||
35 | #include <iprt/assert.h> | |
36 | #include <iprt/err.h> | |
37 | #include "r0drv/alloc-r0drv.h" | |
38 | ||
39 | ||
40 | #if (defined(RT_ARCH_AMD64) || defined(DOXYGEN_RUNNING)) && !defined(RTMEMALLOC_EXEC_HEAP) | |
41 | # if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23) | |
42 | /** | |
43 | * Starting with 2.6.23 we can use __get_vm_area and map_vm_area to allocate | |
44 | * memory in the moduel range. This is preferrable to the exec heap below. | |
45 | */ | |
46 | # define RTMEMALLOC_EXEC_VM_AREA | |
47 | # else | |
48 | /** | |
49 | * We need memory in the module range (~2GB to ~0) this can only be obtained | |
50 | * thru APIs that are not exported (see module_alloc()). | |
51 | * | |
52 | * So, we'll have to create a quick and dirty heap here using BSS memory. | |
53 | * Very annoying and it's going to restrict us! | |
54 | */ | |
55 | # define RTMEMALLOC_EXEC_HEAP | |
56 | # endif | |
57 | #endif | |
58 | ||
59 | #ifdef RTMEMALLOC_EXEC_HEAP | |
60 | # include <iprt/heap.h> | |
61 | # include <iprt/spinlock.h> | |
62 | # include <iprt/err.h> | |
63 | #endif | |
64 | ||
65 | ||
66 | /********************************************************************************************************************************* | |
67 | * Structures and Typedefs * | |
68 | *********************************************************************************************************************************/ | |
69 | #ifdef RTMEMALLOC_EXEC_VM_AREA | |
70 | /** | |
71 | * Extended header used for headers marked with RTMEMHDR_FLAG_EXEC_VM_AREA. | |
72 | * | |
73 | * This is used with allocating executable memory, for things like generated | |
74 | * code and loaded modules. | |
75 | */ | |
76 | typedef struct RTMEMLNXHDREX | |
77 | { | |
78 | /** The VM area for this allocation. */ | |
79 | struct vm_struct *pVmArea; | |
80 | void *pvDummy; | |
81 | /** The header we present to the generic API. */ | |
82 | RTMEMHDR Hdr; | |
83 | } RTMEMLNXHDREX; | |
84 | AssertCompileSize(RTMEMLNXHDREX, 32); | |
85 | /** Pointer to an extended memory header. */ | |
86 | typedef RTMEMLNXHDREX *PRTMEMLNXHDREX; | |
87 | #endif | |
88 | ||
89 | ||
90 | /********************************************************************************************************************************* | |
91 | * Global Variables * | |
92 | *********************************************************************************************************************************/ | |
93 | #ifdef RTMEMALLOC_EXEC_HEAP | |
94 | /** The heap. */ | |
95 | static RTHEAPSIMPLE g_HeapExec = NIL_RTHEAPSIMPLE; | |
96 | /** Spinlock protecting the heap. */ | |
97 | static RTSPINLOCK g_HeapExecSpinlock = NIL_RTSPINLOCK; | |
98 | #endif | |
99 | ||
100 | ||
101 | /** | |
102 | * API for cleaning up the heap spinlock on IPRT termination. | |
103 | * This is as RTMemExecDonate specific to AMD64 Linux/GNU. | |
104 | */ | |
105 | DECLHIDDEN(void) rtR0MemExecCleanup(void) | |
106 | { | |
107 | #ifdef RTMEMALLOC_EXEC_HEAP | |
108 | RTSpinlockDestroy(g_HeapExecSpinlock); | |
109 | g_HeapExecSpinlock = NIL_RTSPINLOCK; | |
110 | #endif | |
111 | } | |
112 | ||
113 | ||
114 | /** | |
115 | * Donate read+write+execute memory to the exec heap. | |
116 | * | |
117 | * This API is specific to AMD64 and Linux/GNU. A kernel module that desires to | |
118 | * use RTMemExecAlloc on AMD64 Linux/GNU will have to donate some statically | |
119 | * allocated memory in the module if it wishes for GCC generated code to work. | |
120 | * GCC can only generate modules that work in the address range ~2GB to ~0 | |
121 | * currently. | |
122 | * | |
123 | * The API only accept one single donation. | |
124 | * | |
125 | * @returns IPRT status code. | |
126 | * @retval VERR_NOT_SUPPORTED if the code isn't enabled. | |
127 | * @param pvMemory Pointer to the memory block. | |
128 | * @param cb The size of the memory block. | |
129 | */ | |
130 | RTR0DECL(int) RTR0MemExecDonate(void *pvMemory, size_t cb) | |
131 | { | |
132 | #ifdef RTMEMALLOC_EXEC_HEAP | |
133 | int rc; | |
134 | AssertReturn(g_HeapExec == NIL_RTHEAPSIMPLE, VERR_WRONG_ORDER); | |
135 | ||
136 | rc = RTSpinlockCreate(&g_HeapExecSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "RTR0MemExecDonate"); | |
137 | if (RT_SUCCESS(rc)) | |
138 | { | |
139 | rc = RTHeapSimpleInit(&g_HeapExec, pvMemory, cb); | |
140 | if (RT_FAILURE(rc)) | |
141 | rtR0MemExecCleanup(); | |
142 | } | |
143 | return rc; | |
144 | #else | |
145 | RT_NOREF_PV(pvMemory); RT_NOREF_PV(cb); | |
146 | return VERR_NOT_SUPPORTED; | |
147 | #endif | |
148 | } | |
149 | RT_EXPORT_SYMBOL(RTR0MemExecDonate); | |
150 | ||
151 | ||
152 | ||
153 | #ifdef RTMEMALLOC_EXEC_VM_AREA | |
154 | /** | |
155 | * Allocate executable kernel memory in the module range. | |
156 | * | |
157 | * @returns Pointer to a allocation header success. NULL on failure. | |
158 | * | |
159 | * @param cb The size the user requested. | |
160 | */ | |
161 | static PRTMEMHDR rtR0MemAllocExecVmArea(size_t cb) | |
162 | { | |
163 | size_t const cbAlloc = RT_ALIGN_Z(sizeof(RTMEMLNXHDREX) + cb, PAGE_SIZE); | |
164 | size_t const cPages = cbAlloc >> PAGE_SHIFT; | |
165 | struct page **papPages; | |
166 | struct vm_struct *pVmArea; | |
167 | size_t iPage; | |
168 | ||
169 | pVmArea = __get_vm_area(cbAlloc, VM_ALLOC, MODULES_VADDR, MODULES_END); | |
170 | if (!pVmArea) | |
171 | return NULL; | |
172 | pVmArea->nr_pages = 0; /* paranoia? */ | |
173 | pVmArea->pages = NULL; /* paranoia? */ | |
174 | ||
175 | papPages = (struct page **)kmalloc(cPages * sizeof(papPages[0]), GFP_KERNEL | __GFP_NOWARN); | |
176 | if (!papPages) | |
177 | { | |
178 | vunmap(pVmArea->addr); | |
179 | return NULL; | |
180 | } | |
181 | ||
182 | for (iPage = 0; iPage < cPages; iPage++) | |
183 | { | |
184 | papPages[iPage] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_NOWARN); | |
185 | if (!papPages[iPage]) | |
186 | break; | |
187 | } | |
188 | if (iPage == cPages) | |
189 | { | |
190 | /* | |
191 | * Map the pages. | |
192 | * | |
193 | * Not entirely sure we really need to set nr_pages and pages here, but | |
194 | * they provide a very convenient place for storing something we need | |
195 | * in the free function, if nothing else... | |
196 | */ | |
197 | # if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) | |
198 | struct page **papPagesIterator = papPages; | |
199 | # endif | |
200 | pVmArea->nr_pages = cPages; | |
201 | pVmArea->pages = papPages; | |
202 | if (!map_vm_area(pVmArea, PAGE_KERNEL_EXEC, | |
203 | # if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) | |
204 | &papPagesIterator | |
205 | # else | |
206 | papPages | |
207 | # endif | |
208 | )) | |
209 | { | |
210 | PRTMEMLNXHDREX pHdrEx = (PRTMEMLNXHDREX)pVmArea->addr; | |
211 | pHdrEx->pVmArea = pVmArea; | |
212 | pHdrEx->pvDummy = NULL; | |
213 | return &pHdrEx->Hdr; | |
214 | } | |
215 | /* bail out */ | |
216 | # if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) | |
217 | pVmArea->nr_pages = papPagesIterator - papPages; | |
218 | # endif | |
219 | } | |
220 | ||
221 | vunmap(pVmArea->addr); | |
222 | ||
223 | while (iPage-- > 0) | |
224 | __free_page(papPages[iPage]); | |
225 | kfree(papPages); | |
226 | ||
227 | return NULL; | |
228 | } | |
229 | #endif /* RTMEMALLOC_EXEC_VM_AREA */ | |
230 | ||
231 | ||
232 | /** | |
233 | * OS specific allocation function. | |
234 | */ | |
235 | DECLHIDDEN(int) rtR0MemAllocEx(size_t cb, uint32_t fFlags, PRTMEMHDR *ppHdr) | |
236 | { | |
237 | PRTMEMHDR pHdr; | |
238 | IPRT_LINUX_SAVE_EFL_AC(); | |
239 | ||
240 | /* | |
241 | * Allocate. | |
242 | */ | |
243 | if (fFlags & RTMEMHDR_FLAG_EXEC) | |
244 | { | |
245 | if (fFlags & RTMEMHDR_FLAG_ANY_CTX) | |
246 | return VERR_NOT_SUPPORTED; | |
247 | ||
248 | #if defined(RT_ARCH_AMD64) | |
249 | # ifdef RTMEMALLOC_EXEC_HEAP | |
250 | if (g_HeapExec != NIL_RTHEAPSIMPLE) | |
251 | { | |
252 | RTSpinlockAcquire(g_HeapExecSpinlock); | |
253 | pHdr = (PRTMEMHDR)RTHeapSimpleAlloc(g_HeapExec, cb + sizeof(*pHdr), 0); | |
254 | RTSpinlockRelease(g_HeapExecSpinlock); | |
255 | fFlags |= RTMEMHDR_FLAG_EXEC_HEAP; | |
256 | } | |
257 | else | |
258 | pHdr = NULL; | |
259 | ||
260 | # elif defined(RTMEMALLOC_EXEC_VM_AREA) | |
261 | pHdr = rtR0MemAllocExecVmArea(cb); | |
262 | fFlags |= RTMEMHDR_FLAG_EXEC_VM_AREA; | |
263 | ||
264 | # else /* !RTMEMALLOC_EXEC_HEAP */ | |
265 | # error "you don not want to go here..." | |
266 | pHdr = (PRTMEMHDR)__vmalloc(cb + sizeof(*pHdr), GFP_KERNEL | __GFP_HIGHMEM | __GFP_NOWARN, MY_PAGE_KERNEL_EXEC); | |
267 | # endif /* !RTMEMALLOC_EXEC_HEAP */ | |
268 | ||
269 | #elif defined(PAGE_KERNEL_EXEC) && defined(CONFIG_X86_PAE) | |
270 | pHdr = (PRTMEMHDR)__vmalloc(cb + sizeof(*pHdr), GFP_KERNEL | __GFP_HIGHMEM | __GFP_NOWARN, MY_PAGE_KERNEL_EXEC); | |
271 | #else | |
272 | pHdr = (PRTMEMHDR)vmalloc(cb + sizeof(*pHdr)); | |
273 | #endif | |
274 | } | |
275 | else | |
276 | { | |
277 | if ( | |
278 | #if 1 /* vmalloc has serious performance issues, avoid it. */ | |
279 | cb <= PAGE_SIZE*16 - sizeof(*pHdr) | |
280 | #else | |
281 | cb <= PAGE_SIZE | |
282 | #endif | |
283 | || (fFlags & RTMEMHDR_FLAG_ANY_CTX) | |
284 | ) | |
285 | { | |
286 | fFlags |= RTMEMHDR_FLAG_KMALLOC; | |
287 | pHdr = kmalloc(cb + sizeof(*pHdr), | |
288 | (fFlags & RTMEMHDR_FLAG_ANY_CTX_ALLOC) ? (GFP_ATOMIC | __GFP_NOWARN) | |
289 | : (GFP_KERNEL | __GFP_NOWARN)); | |
290 | if (RT_UNLIKELY( !pHdr | |
291 | && cb > PAGE_SIZE | |
292 | && !(fFlags & RTMEMHDR_FLAG_ANY_CTX) )) | |
293 | { | |
294 | fFlags &= ~RTMEMHDR_FLAG_KMALLOC; | |
295 | pHdr = vmalloc(cb + sizeof(*pHdr)); | |
296 | } | |
297 | } | |
298 | else | |
299 | pHdr = vmalloc(cb + sizeof(*pHdr)); | |
300 | } | |
301 | if (RT_UNLIKELY(!pHdr)) | |
302 | { | |
303 | IPRT_LINUX_RESTORE_EFL_AC(); | |
304 | return VERR_NO_MEMORY; | |
305 | } | |
306 | ||
307 | /* | |
308 | * Initialize. | |
309 | */ | |
310 | pHdr->u32Magic = RTMEMHDR_MAGIC; | |
311 | pHdr->fFlags = fFlags; | |
312 | pHdr->cb = cb; | |
313 | pHdr->cbReq = cb; | |
314 | ||
315 | *ppHdr = pHdr; | |
316 | IPRT_LINUX_RESTORE_EFL_AC(); | |
317 | return VINF_SUCCESS; | |
318 | } | |
319 | ||
320 | ||
321 | /** | |
322 | * OS specific free function. | |
323 | */ | |
324 | DECLHIDDEN(void) rtR0MemFree(PRTMEMHDR pHdr) | |
325 | { | |
326 | IPRT_LINUX_SAVE_EFL_AC(); | |
327 | ||
328 | pHdr->u32Magic += 1; | |
329 | if (pHdr->fFlags & RTMEMHDR_FLAG_KMALLOC) | |
330 | kfree(pHdr); | |
331 | #ifdef RTMEMALLOC_EXEC_HEAP | |
332 | else if (pHdr->fFlags & RTMEMHDR_FLAG_EXEC_HEAP) | |
333 | { | |
334 | RTSpinlockAcquire(g_HeapExecSpinlock); | |
335 | RTHeapSimpleFree(g_HeapExec, pHdr); | |
336 | RTSpinlockRelease(g_HeapExecSpinlock); | |
337 | } | |
338 | #endif | |
339 | #ifdef RTMEMALLOC_EXEC_VM_AREA | |
340 | else if (pHdr->fFlags & RTMEMHDR_FLAG_EXEC_VM_AREA) | |
341 | { | |
342 | PRTMEMLNXHDREX pHdrEx = RT_FROM_MEMBER(pHdr, RTMEMLNXHDREX, Hdr); | |
343 | size_t iPage = pHdrEx->pVmArea->nr_pages; | |
344 | struct page **papPages = pHdrEx->pVmArea->pages; | |
345 | void *pvMapping = pHdrEx->pVmArea->addr; | |
346 | ||
347 | vunmap(pvMapping); | |
348 | ||
349 | while (iPage-- > 0) | |
350 | __free_page(papPages[iPage]); | |
351 | kfree(papPages); | |
352 | } | |
353 | #endif | |
354 | else | |
355 | vfree(pHdr); | |
356 | ||
357 | IPRT_LINUX_RESTORE_EFL_AC(); | |
358 | } | |
359 | ||
360 | ||
361 | ||
362 | /** | |
363 | * Compute order. Some functions allocate 2^order pages. | |
364 | * | |
365 | * @returns order. | |
366 | * @param cPages Number of pages. | |
367 | */ | |
368 | static int CalcPowerOf2Order(unsigned long cPages) | |
369 | { | |
370 | int iOrder; | |
371 | unsigned long cTmp; | |
372 | ||
373 | for (iOrder = 0, cTmp = cPages; cTmp >>= 1; ++iOrder) | |
374 | ; | |
375 | if (cPages & ~(1 << iOrder)) | |
376 | ++iOrder; | |
377 | ||
378 | return iOrder; | |
379 | } | |
380 | ||
381 | ||
382 | /** | |
383 | * Allocates physical contiguous memory (below 4GB). | |
384 | * The allocation is page aligned and the content is undefined. | |
385 | * | |
386 | * @returns Pointer to the memory block. This is page aligned. | |
387 | * @param pPhys Where to store the physical address. | |
388 | * @param cb The allocation size in bytes. This is always | |
389 | * rounded up to PAGE_SIZE. | |
390 | */ | |
391 | RTR0DECL(void *) RTMemContAlloc(PRTCCPHYS pPhys, size_t cb) | |
392 | { | |
393 | int cOrder; | |
394 | unsigned cPages; | |
395 | struct page *paPages; | |
396 | void *pvRet; | |
397 | IPRT_LINUX_SAVE_EFL_AC(); | |
398 | ||
399 | /* | |
400 | * validate input. | |
401 | */ | |
402 | Assert(VALID_PTR(pPhys)); | |
403 | Assert(cb > 0); | |
404 | ||
405 | /* | |
406 | * Allocate page pointer array. | |
407 | */ | |
408 | cb = RT_ALIGN_Z(cb, PAGE_SIZE); | |
409 | cPages = cb >> PAGE_SHIFT; | |
410 | cOrder = CalcPowerOf2Order(cPages); | |
411 | #if (defined(RT_ARCH_AMD64) || defined(CONFIG_X86_PAE)) && defined(GFP_DMA32) | |
412 | /* ZONE_DMA32: 0-4GB */ | |
413 | paPages = alloc_pages(GFP_DMA32 | __GFP_NOWARN, cOrder); | |
414 | if (!paPages) | |
415 | #endif | |
416 | #ifdef RT_ARCH_AMD64 | |
417 | /* ZONE_DMA; 0-16MB */ | |
418 | paPages = alloc_pages(GFP_DMA | __GFP_NOWARN, cOrder); | |
419 | #else | |
420 | /* ZONE_NORMAL: 0-896MB */ | |
421 | paPages = alloc_pages(GFP_USER | __GFP_NOWARN, cOrder); | |
422 | #endif | |
423 | if (paPages) | |
424 | { | |
425 | /* | |
426 | * Reserve the pages and mark them executable. | |
427 | */ | |
428 | unsigned iPage; | |
429 | for (iPage = 0; iPage < cPages; iPage++) | |
430 | { | |
431 | Assert(!PageHighMem(&paPages[iPage])); | |
432 | if (iPage + 1 < cPages) | |
433 | { | |
434 | AssertMsg( (uintptr_t)phys_to_virt(page_to_phys(&paPages[iPage])) + PAGE_SIZE | |
435 | == (uintptr_t)phys_to_virt(page_to_phys(&paPages[iPage + 1])) | |
436 | && page_to_phys(&paPages[iPage]) + PAGE_SIZE | |
437 | == page_to_phys(&paPages[iPage + 1]), | |
438 | ("iPage=%i cPages=%u [0]=%#llx,%p [1]=%#llx,%p\n", iPage, cPages, | |
439 | (long long)page_to_phys(&paPages[iPage]), phys_to_virt(page_to_phys(&paPages[iPage])), | |
440 | (long long)page_to_phys(&paPages[iPage + 1]), phys_to_virt(page_to_phys(&paPages[iPage + 1])) )); | |
441 | } | |
442 | ||
443 | SetPageReserved(&paPages[iPage]); | |
444 | #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 4, 20) /** @todo find the exact kernel where change_page_attr was introduced. */ | |
445 | MY_SET_PAGES_EXEC(&paPages[iPage], 1); | |
446 | #endif | |
447 | } | |
448 | *pPhys = page_to_phys(paPages); | |
449 | pvRet = phys_to_virt(page_to_phys(paPages)); | |
450 | } | |
451 | else | |
452 | pvRet = NULL; | |
453 | ||
454 | IPRT_LINUX_RESTORE_EFL_AC(); | |
455 | return pvRet; | |
456 | } | |
457 | RT_EXPORT_SYMBOL(RTMemContAlloc); | |
458 | ||
459 | ||
460 | /** | |
461 | * Frees memory allocated using RTMemContAlloc(). | |
462 | * | |
463 | * @param pv Pointer to return from RTMemContAlloc(). | |
464 | * @param cb The cb parameter passed to RTMemContAlloc(). | |
465 | */ | |
466 | RTR0DECL(void) RTMemContFree(void *pv, size_t cb) | |
467 | { | |
468 | if (pv) | |
469 | { | |
470 | int cOrder; | |
471 | unsigned cPages; | |
472 | unsigned iPage; | |
473 | struct page *paPages; | |
474 | IPRT_LINUX_SAVE_EFL_AC(); | |
475 | ||
476 | /* validate */ | |
477 | AssertMsg(!((uintptr_t)pv & PAGE_OFFSET_MASK), ("pv=%p\n", pv)); | |
478 | Assert(cb > 0); | |
479 | ||
480 | /* calc order and get pages */ | |
481 | cb = RT_ALIGN_Z(cb, PAGE_SIZE); | |
482 | cPages = cb >> PAGE_SHIFT; | |
483 | cOrder = CalcPowerOf2Order(cPages); | |
484 | paPages = virt_to_page(pv); | |
485 | ||
486 | /* | |
487 | * Restore page attributes freeing the pages. | |
488 | */ | |
489 | for (iPage = 0; iPage < cPages; iPage++) | |
490 | { | |
491 | ClearPageReserved(&paPages[iPage]); | |
492 | #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 4, 20) /** @todo find the exact kernel where change_page_attr was introduced. */ | |
493 | MY_SET_PAGES_NOEXEC(&paPages[iPage], 1); | |
494 | #endif | |
495 | } | |
496 | __free_pages(paPages, cOrder); | |
497 | IPRT_LINUX_RESTORE_EFL_AC(); | |
498 | } | |
499 | } | |
500 | RT_EXPORT_SYMBOL(RTMemContFree); | |
501 |