]>
Commit | Line | Data |
---|---|---|
1 | // SPDX-License-Identifier: GPL-2.0-or-later | |
2 | /* | |
3 | * Contiguous Memory Allocator | |
4 | * | |
5 | * Copyright (c) 2010-2011 by Samsung Electronics. | |
6 | * Copyright IBM Corporation, 2013 | |
7 | * Copyright LG Electronics Inc., 2014 | |
8 | * Written by: | |
9 | * Marek Szyprowski <m.szyprowski@samsung.com> | |
10 | * Michal Nazarewicz <mina86@mina86.com> | |
11 | * Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> | |
12 | * Joonsoo Kim <iamjoonsoo.kim@lge.com> | |
13 | */ | |
14 | ||
15 | #define pr_fmt(fmt) "cma: " fmt | |
16 | ||
17 | #ifdef CONFIG_CMA_DEBUG | |
18 | #ifndef DEBUG | |
19 | # define DEBUG | |
20 | #endif | |
21 | #endif | |
22 | #define CREATE_TRACE_POINTS | |
23 | ||
24 | #include <linux/memblock.h> | |
25 | #include <linux/err.h> | |
26 | #include <linux/mm.h> | |
27 | #include <linux/sizes.h> | |
28 | #include <linux/slab.h> | |
29 | #include <linux/log2.h> | |
30 | #include <linux/cma.h> | |
31 | #include <linux/highmem.h> | |
32 | #include <linux/io.h> | |
33 | #include <linux/kmemleak.h> | |
34 | #include <trace/events/cma.h> | |
35 | ||
36 | #include "cma.h" | |
37 | ||
38 | struct cma cma_areas[MAX_CMA_AREAS]; | |
39 | unsigned cma_area_count; | |
40 | ||
41 | phys_addr_t cma_get_base(const struct cma *cma) | |
42 | { | |
43 | return PFN_PHYS(cma->base_pfn); | |
44 | } | |
45 | ||
46 | unsigned long cma_get_size(const struct cma *cma) | |
47 | { | |
48 | return cma->count << PAGE_SHIFT; | |
49 | } | |
50 | ||
51 | const char *cma_get_name(const struct cma *cma) | |
52 | { | |
53 | return cma->name; | |
54 | } | |
55 | ||
56 | static unsigned long cma_bitmap_aligned_mask(const struct cma *cma, | |
57 | unsigned int align_order) | |
58 | { | |
59 | if (align_order <= cma->order_per_bit) | |
60 | return 0; | |
61 | return (1UL << (align_order - cma->order_per_bit)) - 1; | |
62 | } | |
63 | ||
64 | /* | |
65 | * Find the offset of the base PFN from the specified align_order. | |
66 | * The value returned is represented in order_per_bits. | |
67 | */ | |
68 | static unsigned long cma_bitmap_aligned_offset(const struct cma *cma, | |
69 | unsigned int align_order) | |
70 | { | |
71 | return (cma->base_pfn & ((1UL << align_order) - 1)) | |
72 | >> cma->order_per_bit; | |
73 | } | |
74 | ||
75 | static unsigned long cma_bitmap_pages_to_bits(const struct cma *cma, | |
76 | unsigned long pages) | |
77 | { | |
78 | return ALIGN(pages, 1UL << cma->order_per_bit) >> cma->order_per_bit; | |
79 | } | |
80 | ||
81 | static void cma_clear_bitmap(struct cma *cma, unsigned long pfn, | |
82 | unsigned long count) | |
83 | { | |
84 | unsigned long bitmap_no, bitmap_count; | |
85 | unsigned long flags; | |
86 | ||
87 | bitmap_no = (pfn - cma->base_pfn) >> cma->order_per_bit; | |
88 | bitmap_count = cma_bitmap_pages_to_bits(cma, count); | |
89 | ||
90 | spin_lock_irqsave(&cma->lock, flags); | |
91 | bitmap_clear(cma->bitmap, bitmap_no, bitmap_count); | |
92 | spin_unlock_irqrestore(&cma->lock, flags); | |
93 | } | |
94 | ||
95 | static void __init cma_activate_area(struct cma *cma) | |
96 | { | |
97 | unsigned long base_pfn = cma->base_pfn, pfn; | |
98 | struct zone *zone; | |
99 | ||
100 | cma->bitmap = bitmap_zalloc(cma_bitmap_maxno(cma), GFP_KERNEL); | |
101 | if (!cma->bitmap) | |
102 | goto out_error; | |
103 | ||
104 | /* | |
105 | * alloc_contig_range() requires the pfn range specified to be in the | |
106 | * same zone. Simplify by forcing the entire CMA resv range to be in the | |
107 | * same zone. | |
108 | */ | |
109 | WARN_ON_ONCE(!pfn_valid(base_pfn)); | |
110 | zone = page_zone(pfn_to_page(base_pfn)); | |
111 | for (pfn = base_pfn + 1; pfn < base_pfn + cma->count; pfn++) { | |
112 | WARN_ON_ONCE(!pfn_valid(pfn)); | |
113 | if (page_zone(pfn_to_page(pfn)) != zone) | |
114 | goto not_in_zone; | |
115 | } | |
116 | ||
117 | for (pfn = base_pfn; pfn < base_pfn + cma->count; | |
118 | pfn += pageblock_nr_pages) | |
119 | init_cma_reserved_pageblock(pfn_to_page(pfn)); | |
120 | ||
121 | spin_lock_init(&cma->lock); | |
122 | ||
123 | #ifdef CONFIG_CMA_DEBUGFS | |
124 | INIT_HLIST_HEAD(&cma->mem_head); | |
125 | spin_lock_init(&cma->mem_head_lock); | |
126 | #endif | |
127 | ||
128 | return; | |
129 | ||
130 | not_in_zone: | |
131 | bitmap_free(cma->bitmap); | |
132 | out_error: | |
133 | /* Expose all pages to the buddy, they are useless for CMA. */ | |
134 | for (pfn = base_pfn; pfn < base_pfn + cma->count; pfn++) | |
135 | free_reserved_page(pfn_to_page(pfn)); | |
136 | totalcma_pages -= cma->count; | |
137 | cma->count = 0; | |
138 | pr_err("CMA area %s could not be activated\n", cma->name); | |
139 | return; | |
140 | } | |
141 | ||
142 | static int __init cma_init_reserved_areas(void) | |
143 | { | |
144 | int i; | |
145 | ||
146 | for (i = 0; i < cma_area_count; i++) | |
147 | cma_activate_area(&cma_areas[i]); | |
148 | ||
149 | return 0; | |
150 | } | |
151 | core_initcall(cma_init_reserved_areas); | |
152 | ||
153 | /** | |
154 | * cma_init_reserved_mem() - create custom contiguous area from reserved memory | |
155 | * @base: Base address of the reserved area | |
156 | * @size: Size of the reserved area (in bytes), | |
157 | * @order_per_bit: Order of pages represented by one bit on bitmap. | |
158 | * @name: The name of the area. If this parameter is NULL, the name of | |
159 | * the area will be set to "cmaN", where N is a running counter of | |
160 | * used areas. | |
161 | * @res_cma: Pointer to store the created cma region. | |
162 | * | |
163 | * This function creates custom contiguous area from already reserved memory. | |
164 | */ | |
165 | int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, | |
166 | unsigned int order_per_bit, | |
167 | const char *name, | |
168 | struct cma **res_cma) | |
169 | { | |
170 | struct cma *cma; | |
171 | phys_addr_t alignment; | |
172 | ||
173 | /* Sanity checks */ | |
174 | if (cma_area_count == ARRAY_SIZE(cma_areas)) { | |
175 | pr_err("Not enough slots for CMA reserved regions!\n"); | |
176 | return -ENOSPC; | |
177 | } | |
178 | ||
179 | if (!size || !memblock_is_region_reserved(base, size)) | |
180 | return -EINVAL; | |
181 | ||
182 | /* ensure minimal alignment required by mm core */ | |
183 | alignment = PAGE_SIZE << | |
184 | max_t(unsigned long, MAX_ORDER - 1, pageblock_order); | |
185 | ||
186 | /* alignment should be aligned with order_per_bit */ | |
187 | if (!IS_ALIGNED(alignment >> PAGE_SHIFT, 1 << order_per_bit)) | |
188 | return -EINVAL; | |
189 | ||
190 | if (ALIGN(base, alignment) != base || ALIGN(size, alignment) != size) | |
191 | return -EINVAL; | |
192 | ||
193 | /* | |
194 | * Each reserved area must be initialised later, when more kernel | |
195 | * subsystems (like slab allocator) are available. | |
196 | */ | |
197 | cma = &cma_areas[cma_area_count]; | |
198 | ||
199 | if (name) | |
200 | snprintf(cma->name, CMA_MAX_NAME, name); | |
201 | else | |
202 | snprintf(cma->name, CMA_MAX_NAME, "cma%d\n", cma_area_count); | |
203 | ||
204 | cma->base_pfn = PFN_DOWN(base); | |
205 | cma->count = size >> PAGE_SHIFT; | |
206 | cma->order_per_bit = order_per_bit; | |
207 | *res_cma = cma; | |
208 | cma_area_count++; | |
209 | totalcma_pages += (size / PAGE_SIZE); | |
210 | ||
211 | return 0; | |
212 | } | |
213 | ||
214 | /** | |
215 | * cma_declare_contiguous_nid() - reserve custom contiguous area | |
216 | * @base: Base address of the reserved area optional, use 0 for any | |
217 | * @size: Size of the reserved area (in bytes), | |
218 | * @limit: End address of the reserved memory (optional, 0 for any). | |
219 | * @alignment: Alignment for the CMA area, should be power of 2 or zero | |
220 | * @order_per_bit: Order of pages represented by one bit on bitmap. | |
221 | * @fixed: hint about where to place the reserved area | |
222 | * @name: The name of the area. See function cma_init_reserved_mem() | |
223 | * @res_cma: Pointer to store the created cma region. | |
224 | * @nid: nid of the free area to find, %NUMA_NO_NODE for any node | |
225 | * | |
226 | * This function reserves memory from early allocator. It should be | |
227 | * called by arch specific code once the early allocator (memblock or bootmem) | |
228 | * has been activated and all other subsystems have already allocated/reserved | |
229 | * memory. This function allows to create custom reserved areas. | |
230 | * | |
231 | * If @fixed is true, reserve contiguous area at exactly @base. If false, | |
232 | * reserve in range from @base to @limit. | |
233 | */ | |
234 | int __init cma_declare_contiguous_nid(phys_addr_t base, | |
235 | phys_addr_t size, phys_addr_t limit, | |
236 | phys_addr_t alignment, unsigned int order_per_bit, | |
237 | bool fixed, const char *name, struct cma **res_cma, | |
238 | int nid) | |
239 | { | |
240 | phys_addr_t memblock_end = memblock_end_of_DRAM(); | |
241 | phys_addr_t highmem_start; | |
242 | int ret = 0; | |
243 | ||
244 | /* | |
245 | * We can't use __pa(high_memory) directly, since high_memory | |
246 | * isn't a valid direct map VA, and DEBUG_VIRTUAL will (validly) | |
247 | * complain. Find the boundary by adding one to the last valid | |
248 | * address. | |
249 | */ | |
250 | highmem_start = __pa(high_memory - 1) + 1; | |
251 | pr_debug("%s(size %pa, base %pa, limit %pa alignment %pa)\n", | |
252 | __func__, &size, &base, &limit, &alignment); | |
253 | ||
254 | if (cma_area_count == ARRAY_SIZE(cma_areas)) { | |
255 | pr_err("Not enough slots for CMA reserved regions!\n"); | |
256 | return -ENOSPC; | |
257 | } | |
258 | ||
259 | if (!size) | |
260 | return -EINVAL; | |
261 | ||
262 | if (alignment && !is_power_of_2(alignment)) | |
263 | return -EINVAL; | |
264 | ||
265 | /* | |
266 | * Sanitise input arguments. | |
267 | * Pages both ends in CMA area could be merged into adjacent unmovable | |
268 | * migratetype page by page allocator's buddy algorithm. In the case, | |
269 | * you couldn't get a contiguous memory, which is not what we want. | |
270 | */ | |
271 | alignment = max(alignment, (phys_addr_t)PAGE_SIZE << | |
272 | max_t(unsigned long, MAX_ORDER - 1, pageblock_order)); | |
273 | if (fixed && base & (alignment - 1)) { | |
274 | ret = -EINVAL; | |
275 | pr_err("Region at %pa must be aligned to %pa bytes\n", | |
276 | &base, &alignment); | |
277 | goto err; | |
278 | } | |
279 | base = ALIGN(base, alignment); | |
280 | size = ALIGN(size, alignment); | |
281 | limit &= ~(alignment - 1); | |
282 | ||
283 | if (!base) | |
284 | fixed = false; | |
285 | ||
286 | /* size should be aligned with order_per_bit */ | |
287 | if (!IS_ALIGNED(size >> PAGE_SHIFT, 1 << order_per_bit)) | |
288 | return -EINVAL; | |
289 | ||
290 | /* | |
291 | * If allocating at a fixed base the request region must not cross the | |
292 | * low/high memory boundary. | |
293 | */ | |
294 | if (fixed && base < highmem_start && base + size > highmem_start) { | |
295 | ret = -EINVAL; | |
296 | pr_err("Region at %pa defined on low/high memory boundary (%pa)\n", | |
297 | &base, &highmem_start); | |
298 | goto err; | |
299 | } | |
300 | ||
301 | /* | |
302 | * If the limit is unspecified or above the memblock end, its effective | |
303 | * value will be the memblock end. Set it explicitly to simplify further | |
304 | * checks. | |
305 | */ | |
306 | if (limit == 0 || limit > memblock_end) | |
307 | limit = memblock_end; | |
308 | ||
309 | if (base + size > limit) { | |
310 | ret = -EINVAL; | |
311 | pr_err("Size (%pa) of region at %pa exceeds limit (%pa)\n", | |
312 | &size, &base, &limit); | |
313 | goto err; | |
314 | } | |
315 | ||
316 | /* Reserve memory */ | |
317 | if (fixed) { | |
318 | if (memblock_is_region_reserved(base, size) || | |
319 | memblock_reserve(base, size) < 0) { | |
320 | ret = -EBUSY; | |
321 | goto err; | |
322 | } | |
323 | } else { | |
324 | phys_addr_t addr = 0; | |
325 | ||
326 | /* | |
327 | * All pages in the reserved area must come from the same zone. | |
328 | * If the requested region crosses the low/high memory boundary, | |
329 | * try allocating from high memory first and fall back to low | |
330 | * memory in case of failure. | |
331 | */ | |
332 | if (base < highmem_start && limit > highmem_start) { | |
333 | addr = memblock_alloc_range_nid(size, alignment, | |
334 | highmem_start, limit, nid, true); | |
335 | limit = highmem_start; | |
336 | } | |
337 | ||
338 | /* | |
339 | * If there is enough memory, try a bottom-up allocation first. | |
340 | * It will place the new cma area close to the start of the node | |
341 | * and guarantee that the compaction is moving pages out of the | |
342 | * cma area and not into it. | |
343 | * Avoid using first 4GB to not interfere with constrained zones | |
344 | * like DMA/DMA32. | |
345 | */ | |
346 | #ifdef CONFIG_PHYS_ADDR_T_64BIT | |
347 | if (!memblock_bottom_up() && memblock_end >= SZ_4G + size) { | |
348 | memblock_set_bottom_up(true); | |
349 | addr = memblock_alloc_range_nid(size, alignment, SZ_4G, | |
350 | limit, nid, true); | |
351 | memblock_set_bottom_up(false); | |
352 | } | |
353 | #endif | |
354 | ||
355 | if (!addr) { | |
356 | addr = memblock_alloc_range_nid(size, alignment, base, | |
357 | limit, nid, true); | |
358 | if (!addr) { | |
359 | ret = -ENOMEM; | |
360 | goto err; | |
361 | } | |
362 | } | |
363 | ||
364 | /* | |
365 | * kmemleak scans/reads tracked objects for pointers to other | |
366 | * objects but this address isn't mapped and accessible | |
367 | */ | |
368 | kmemleak_ignore_phys(addr); | |
369 | base = addr; | |
370 | } | |
371 | ||
372 | ret = cma_init_reserved_mem(base, size, order_per_bit, name, res_cma); | |
373 | if (ret) | |
374 | goto free_mem; | |
375 | ||
376 | pr_info("Reserved %ld MiB at %pa\n", (unsigned long)size / SZ_1M, | |
377 | &base); | |
378 | return 0; | |
379 | ||
380 | free_mem: | |
381 | memblock_free(base, size); | |
382 | err: | |
383 | pr_err("Failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M); | |
384 | return ret; | |
385 | } | |
386 | ||
387 | #ifdef CONFIG_CMA_DEBUG | |
388 | static void cma_debug_show_areas(struct cma *cma) | |
389 | { | |
390 | unsigned long next_zero_bit, next_set_bit, nr_zero; | |
391 | unsigned long start = 0; | |
392 | unsigned long nr_part, nr_total = 0; | |
393 | unsigned long nbits = cma_bitmap_maxno(cma); | |
394 | ||
395 | spin_lock_irq(&cma->lock); | |
396 | pr_info("number of available pages: "); | |
397 | for (;;) { | |
398 | next_zero_bit = find_next_zero_bit(cma->bitmap, nbits, start); | |
399 | if (next_zero_bit >= nbits) | |
400 | break; | |
401 | next_set_bit = find_next_bit(cma->bitmap, nbits, next_zero_bit); | |
402 | nr_zero = next_set_bit - next_zero_bit; | |
403 | nr_part = nr_zero << cma->order_per_bit; | |
404 | pr_cont("%s%lu@%lu", nr_total ? "+" : "", nr_part, | |
405 | next_zero_bit); | |
406 | nr_total += nr_part; | |
407 | start = next_zero_bit + nr_zero; | |
408 | } | |
409 | pr_cont("=> %lu free of %lu total pages\n", nr_total, cma->count); | |
410 | spin_unlock_irq(&cma->lock); | |
411 | } | |
412 | #else | |
413 | static inline void cma_debug_show_areas(struct cma *cma) { } | |
414 | #endif | |
415 | ||
416 | /** | |
417 | * cma_alloc() - allocate pages from contiguous area | |
418 | * @cma: Contiguous memory region for which the allocation is performed. | |
419 | * @count: Requested number of pages. | |
420 | * @align: Requested alignment of pages (in PAGE_SIZE order). | |
421 | * @no_warn: Avoid printing message about failed allocation | |
422 | * | |
423 | * This function allocates part of contiguous memory on specific | |
424 | * contiguous memory area. | |
425 | */ | |
426 | struct page *cma_alloc(struct cma *cma, unsigned long count, | |
427 | unsigned int align, bool no_warn) | |
428 | { | |
429 | unsigned long mask, offset; | |
430 | unsigned long pfn = -1; | |
431 | unsigned long start = 0; | |
432 | unsigned long bitmap_maxno, bitmap_no, bitmap_count; | |
433 | unsigned long i; | |
434 | struct page *page = NULL; | |
435 | int ret = -ENOMEM; | |
436 | ||
437 | if (!cma || !cma->count || !cma->bitmap) | |
438 | goto out; | |
439 | ||
440 | pr_debug("%s(cma %p, count %lu, align %d)\n", __func__, (void *)cma, | |
441 | count, align); | |
442 | ||
443 | if (!count) | |
444 | goto out; | |
445 | ||
446 | trace_cma_alloc_start(cma->name, count, align); | |
447 | ||
448 | mask = cma_bitmap_aligned_mask(cma, align); | |
449 | offset = cma_bitmap_aligned_offset(cma, align); | |
450 | bitmap_maxno = cma_bitmap_maxno(cma); | |
451 | bitmap_count = cma_bitmap_pages_to_bits(cma, count); | |
452 | ||
453 | if (bitmap_count > bitmap_maxno) | |
454 | goto out; | |
455 | ||
456 | for (;;) { | |
457 | spin_lock_irq(&cma->lock); | |
458 | bitmap_no = bitmap_find_next_zero_area_off(cma->bitmap, | |
459 | bitmap_maxno, start, bitmap_count, mask, | |
460 | offset); | |
461 | if (bitmap_no >= bitmap_maxno) { | |
462 | spin_unlock_irq(&cma->lock); | |
463 | break; | |
464 | } | |
465 | bitmap_set(cma->bitmap, bitmap_no, bitmap_count); | |
466 | /* | |
467 | * It's safe to drop the lock here. We've marked this region for | |
468 | * our exclusive use. If the migration fails we will take the | |
469 | * lock again and unmark it. | |
470 | */ | |
471 | spin_unlock_irq(&cma->lock); | |
472 | ||
473 | pfn = cma->base_pfn + (bitmap_no << cma->order_per_bit); | |
474 | ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA, | |
475 | GFP_KERNEL | (no_warn ? __GFP_NOWARN : 0)); | |
476 | ||
477 | if (ret == 0) { | |
478 | page = pfn_to_page(pfn); | |
479 | break; | |
480 | } | |
481 | ||
482 | cma_clear_bitmap(cma, pfn, count); | |
483 | if (ret != -EBUSY) | |
484 | break; | |
485 | ||
486 | pr_debug("%s(): memory range at %p is busy, retrying\n", | |
487 | __func__, pfn_to_page(pfn)); | |
488 | ||
489 | trace_cma_alloc_busy_retry(cma->name, pfn, pfn_to_page(pfn), | |
490 | count, align); | |
491 | /* try again with a bit different memory target */ | |
492 | start = bitmap_no + mask + 1; | |
493 | } | |
494 | ||
495 | trace_cma_alloc_finish(cma->name, pfn, page, count, align); | |
496 | ||
497 | /* | |
498 | * CMA can allocate multiple page blocks, which results in different | |
499 | * blocks being marked with different tags. Reset the tags to ignore | |
500 | * those page blocks. | |
501 | */ | |
502 | if (page) { | |
503 | for (i = 0; i < count; i++) | |
504 | page_kasan_tag_reset(page + i); | |
505 | } | |
506 | ||
507 | if (ret && !no_warn) { | |
508 | pr_err_ratelimited("%s: %s: alloc failed, req-size: %lu pages, ret: %d\n", | |
509 | __func__, cma->name, count, ret); | |
510 | cma_debug_show_areas(cma); | |
511 | } | |
512 | ||
513 | pr_debug("%s(): returned %p\n", __func__, page); | |
514 | out: | |
515 | if (page) { | |
516 | count_vm_event(CMA_ALLOC_SUCCESS); | |
517 | cma_sysfs_account_success_pages(cma, count); | |
518 | } else { | |
519 | count_vm_event(CMA_ALLOC_FAIL); | |
520 | if (cma) | |
521 | cma_sysfs_account_fail_pages(cma, count); | |
522 | } | |
523 | ||
524 | return page; | |
525 | } | |
526 | ||
527 | /** | |
528 | * cma_release() - release allocated pages | |
529 | * @cma: Contiguous memory region for which the allocation is performed. | |
530 | * @pages: Allocated pages. | |
531 | * @count: Number of allocated pages. | |
532 | * | |
533 | * This function releases memory allocated by cma_alloc(). | |
534 | * It returns false when provided pages do not belong to contiguous area and | |
535 | * true otherwise. | |
536 | */ | |
537 | bool cma_release(struct cma *cma, const struct page *pages, | |
538 | unsigned long count) | |
539 | { | |
540 | unsigned long pfn; | |
541 | ||
542 | if (!cma || !pages) | |
543 | return false; | |
544 | ||
545 | pr_debug("%s(page %p, count %lu)\n", __func__, (void *)pages, count); | |
546 | ||
547 | pfn = page_to_pfn(pages); | |
548 | ||
549 | if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count) | |
550 | return false; | |
551 | ||
552 | VM_BUG_ON(pfn + count > cma->base_pfn + cma->count); | |
553 | ||
554 | free_contig_range(pfn, count); | |
555 | cma_clear_bitmap(cma, pfn, count); | |
556 | trace_cma_release(cma->name, pfn, pages, count); | |
557 | ||
558 | return true; | |
559 | } | |
560 | ||
561 | int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data) | |
562 | { | |
563 | int i; | |
564 | ||
565 | for (i = 0; i < cma_area_count; i++) { | |
566 | int ret = it(&cma_areas[i], data); | |
567 | ||
568 | if (ret) | |
569 | return ret; | |
570 | } | |
571 | ||
572 | return 0; | |
573 | } |