[mirror_ubuntu-jammy-kernel.git] / mm / hugetlb_vmemmap.c

// SPDX-License-Identifier: GPL-2.0
/*
 * Free some vmemmap pages of HugeTLB
 *
 * Copyright (c) 2020, Bytedance. All rights reserved.
 *
 *     Author: Muchun Song <songmuchun@bytedance.com>
 *
 * The struct page structures (page structs) are used to describe a physical
 * page frame. By default, there is a one-to-one mapping from a page frame to
 * it's corresponding page struct.
 *
 * HugeTLB pages consist of multiple base page size pages and is supported by
 * many architectures. See hugetlbpage.rst in the Documentation directory for
 * more details. On the x86-64 architecture, HugeTLB pages of size 2MB and 1GB
 * are currently supported. Since the base page size on x86 is 4KB, a 2MB
 * HugeTLB page consists of 512 base pages and a 1GB HugeTLB page consists of
 * 4096 base pages. For each base page, there is a corresponding page struct.
 *
 * Within the HugeTLB subsystem, only the first 4 page structs are used to
 * contain unique information about a HugeTLB page. __NR_USED_SUBPAGE provides
 * this upper limit. The only 'useful' information in the remaining page structs
 * is the compound_head field, and this field is the same for all tail pages.
 *
 * By removing redundant page structs for HugeTLB pages, memory can be returned
 * to the buddy allocator for other uses.
 *
 * Different architectures support different HugeTLB pages. For example, the
 * following table is the HugeTLB page size supported by x86 and arm64
 * architectures. Because arm64 supports 4k, 16k, and 64k base pages and
 * supports contiguous entries, so it supports many kinds of sizes of HugeTLB
 * page.
 *
 * +--------------+-----------+-----------------------------------------------+
 * | Architecture | Page Size |                HugeTLB Page Size              |
 * +--------------+-----------+-----------+-----------+-----------+-----------+
 * |    x86-64    |    4KB    |    2MB    |    1GB    |           |           |
 * +--------------+-----------+-----------+-----------+-----------+-----------+
 * |              |    4KB    |   64KB    |    2MB    |    32MB   |    1GB    |
 * |              +-----------+-----------+-----------+-----------+-----------+
 * |    arm64     |   16KB    |    2MB    |   32MB    |     1GB   |           |
 * |              +-----------+-----------+-----------+-----------+-----------+
 * |              |   64KB    |    2MB    |  512MB    |    16GB   |           |
 * +--------------+-----------+-----------+-----------+-----------+-----------+
 *
 * When the system boot up, every HugeTLB page has more than one struct page
 * structs which size is (unit: pages):
 *
 *    struct_size = HugeTLB_Size / PAGE_SIZE * sizeof(struct page) / PAGE_SIZE
 *
 * Where HugeTLB_Size is the size of the HugeTLB page. We know that the size
 * of the HugeTLB page is always n times PAGE_SIZE. So we can get the following
 * relationship.
 *
 *    HugeTLB_Size = n * PAGE_SIZE
 *
 * Then,
 *
 *    struct_size = n * PAGE_SIZE / PAGE_SIZE * sizeof(struct page) / PAGE_SIZE
 *                = n * sizeof(struct page) / PAGE_SIZE
 *
 * We can use huge mapping at the pud/pmd level for the HugeTLB page.
 *
 * For the HugeTLB page of the pmd level mapping, then
 *
 *    struct_size = n * sizeof(struct page) / PAGE_SIZE
 *                = PAGE_SIZE / sizeof(pte_t) * sizeof(struct page) / PAGE_SIZE
 *                = sizeof(struct page) / sizeof(pte_t)
 *                = 64 / 8
 *                = 8 (pages)
 *
 * Where n is how many pte entries which one page can contains. So the value of
 * n is (PAGE_SIZE / sizeof(pte_t)).
 *
 * This optimization only supports 64-bit system, so the value of sizeof(pte_t)
 * is 8. And this optimization also applicable only when the size of struct page
 * is a power of two. In most cases, the size of struct page is 64 bytes (e.g.
 * x86-64 and arm64). So if we use pmd level mapping for a HugeTLB page, the
 * size of struct page structs of it is 8 page frames which size depends on the
 * size of the base page.
 *
 * For the HugeTLB page of the pud level mapping, then
 *
 *    struct_size = PAGE_SIZE / sizeof(pmd_t) * struct_size(pmd)
 *                = PAGE_SIZE / 8 * 8 (pages)
 *                = PAGE_SIZE (pages)
 *
 * Where the struct_size(pmd) is the size of the struct page structs of a
 * HugeTLB page of the pmd level mapping.
 *
 * E.g.: A 2MB HugeTLB page on x86_64 consists in 8 page frames while 1GB
 * HugeTLB page consists in 4096.
 *
 * Next, we take the pmd level mapping of the HugeTLB page as an example to
 * show the internal implementation of this optimization. There are 8 pages
 * struct page structs associated with a HugeTLB page which is pmd mapped.
 *
 * Here is how things look before optimization.
 *
 *    HugeTLB                  struct pages(8 pages)         page frame(8 pages)
 * +-----------+ ---virt_to_page---> +-----------+   mapping to   +-----------+
 * |           |                     |     0     | -------------> |     0     |
 * |           |                     +-----------+                +-----------+
 * |           |                     |     1     | -------------> |     1     |
 * |           |                     +-----------+                +-----------+
 * |           |                     |     2     | -------------> |     2     |
 * |           |                     +-----------+                +-----------+
 * |           |                     |     3     | -------------> |     3     |
 * |           |                     +-----------+                +-----------+
 * |           |                     |     4     | -------------> |     4     |
 * |    PMD    |                     +-----------+                +-----------+
 * |   level   |                     |     5     | -------------> |     5     |
 * |  mapping  |                     +-----------+                +-----------+
 * |           |                     |     6     | -------------> |     6     |
 * |           |                     +-----------+                +-----------+
 * |           |                     |     7     | -------------> |     7     |
 * |           |                     +-----------+                +-----------+
 * |           |
 * |           |
 * |           |
 * +-----------+
 *
 * The value of page->compound_head is the same for all tail pages. The first
 * page of page structs (page 0) associated with the HugeTLB page contains the 4
 * page structs necessary to describe the HugeTLB. The only use of the remaining
 * pages of page structs (page 1 to page 7) is to point to page->compound_head.
 * Therefore, we can remap pages 2 to 7 to page 1. Only 2 pages of page structs
 * will be used for each HugeTLB page. This will allow us to free the remaining
 * 6 pages to the buddy allocator.
 *
 * Here is how things look after remapping.
 *
 *    HugeTLB                  struct pages(8 pages)         page frame(8 pages)
 * +-----------+ ---virt_to_page---> +-----------+   mapping to   +-----------+
 * |           |                     |     0     | -------------> |     0     |
 * |           |                     +-----------+                +-----------+
 * |           |                     |     1     | -------------> |     1     |
 * |           |                     +-----------+                +-----------+
 * |           |                     |     2     | ----------------^ ^ ^ ^ ^ ^
 * |           |                     +-----------+                   | | | | |
 * |           |                     |     3     | ------------------+ | | | |
 * |           |                     +-----------+                     | | | |
 * |           |                     |     4     | --------------------+ | | |
 * |    PMD    |                     +-----------+                       | | |
 * |   level   |                     |     5     | ----------------------+ | |
 * |  mapping  |                     +-----------+                         | |
 * |           |                     |     6     | ------------------------+ |
 * |           |                     +-----------+                           |
 * |           |                     |     7     | --------------------------+
 * |           |                     +-----------+
 * |           |
 * |           |
 * |           |
 * +-----------+
 *
 * When a HugeTLB is freed to the buddy system, we should allocate 6 pages for
 * vmemmap pages and restore the previous mapping relationship.
 *
 * For the HugeTLB page of the pud level mapping. It is similar to the former.
 * We also can use this approach to free (PAGE_SIZE - 2) vmemmap pages.
 *
 * Apart from the HugeTLB page of the pmd/pud level mapping, some architectures
 * (e.g. aarch64) provides a contiguous bit in the translation table entries
 * that hints to the MMU to indicate that it is one of a contiguous set of
 * entries that can be cached in a single TLB entry.
 *
 * The contiguous bit is used to increase the mapping size at the pmd and pte
 * (last) level. So this type of HugeTLB page can be optimized only when its
 * size of the struct page structs is greater than 2 pages.
 */
#include "hugetlb_vmemmap.h"

/*
 * There are a lot of struct page structures associated with each HugeTLB page.
 * For tail pages, the value of compound_head is the same. So we can reuse first
 * page of tail page structures. We map the virtual addresses of the remaining
 * pages of tail page structures to the first tail page struct, and then free
 * these page frames. Therefore, we need to reserve two pages as vmemmap areas.
 */
#define RESERVE_VMEMMAP_NR		2U
#define RESERVE_VMEMMAP_SIZE		(RESERVE_VMEMMAP_NR << PAGE_SHIFT)

/*
 * How many vmemmap pages associated with a HugeTLB page that can be freed
 * to the buddy allocator.
 *
 * Todo: Returns zero for now, which means the feature is disabled. We will
 * enable it once all the infrastructure is there.
 */
static inline unsigned int free_vmemmap_pages_per_hpage(struct hstate *h)
{
	return 0;
}

static inline unsigned long free_vmemmap_pages_size_per_hpage(struct hstate *h)
{
	return (unsigned long)free_vmemmap_pages_per_hpage(h) << PAGE_SHIFT;
}

void free_huge_page_vmemmap(struct hstate *h, struct page *head)
{
	unsigned long vmemmap_addr = (unsigned long)head;
	unsigned long vmemmap_end, vmemmap_reuse;

	if (!free_vmemmap_pages_per_hpage(h))
		return;

	vmemmap_addr += RESERVE_VMEMMAP_SIZE;
	vmemmap_end = vmemmap_addr + free_vmemmap_pages_size_per_hpage(h);
	vmemmap_reuse = vmemmap_addr - PAGE_SIZE;

	/*
	 * Remap the vmemmap virtual address range [@vmemmap_addr, @vmemmap_end)
	 * to the page which @vmemmap_reuse is mapped to, then free the pages
	 * which the range [@vmemmap_addr, @vmemmap_end] is mapped to.
	 */
	vmemmap_remap_free(vmemmap_addr, vmemmap_end, vmemmap_reuse);
}
Commit	Line	Data
f41f2ed4 MS	1	// SPDX-License-Identifier: GPL-2.0
	2	/*
	3	* Free some vmemmap pages of HugeTLB
	4	*
	5	* Copyright (c) 2020, Bytedance. All rights reserved.
	6	*
	7	* Author: Muchun Song <songmuchun@bytedance.com>
	8	*
	9	* The struct page structures (page structs) are used to describe a physical
	10	* page frame. By default, there is a one-to-one mapping from a page frame to
	11	* it's corresponding page struct.
	12	*
	13	* HugeTLB pages consist of multiple base page size pages and is supported by
	14	* many architectures. See hugetlbpage.rst in the Documentation directory for
	15	* more details. On the x86-64 architecture, HugeTLB pages of size 2MB and 1GB
	16	* are currently supported. Since the base page size on x86 is 4KB, a 2MB
	17	* HugeTLB page consists of 512 base pages and a 1GB HugeTLB page consists of
	18	* 4096 base pages. For each base page, there is a corresponding page struct.
	19	*
	20	* Within the HugeTLB subsystem, only the first 4 page structs are used to
	21	* contain unique information about a HugeTLB page. __NR_USED_SUBPAGE provides
	22	* this upper limit. The only 'useful' information in the remaining page structs
	23	* is the compound_head field, and this field is the same for all tail pages.
	24	*
	25	* By removing redundant page structs for HugeTLB pages, memory can be returned
	26	* to the buddy allocator for other uses.
	27	*
	28	* Different architectures support different HugeTLB pages. For example, the
	29	* following table is the HugeTLB page size supported by x86 and arm64
	30	* architectures. Because arm64 supports 4k, 16k, and 64k base pages and
	31	* supports contiguous entries, so it supports many kinds of sizes of HugeTLB
	32	* page.
	33	*
	34	* +--------------+-----------+-----------------------------------------------+
	35	* \| Architecture \| Page Size \| HugeTLB Page Size \|
	36	* +--------------+-----------+-----------+-----------+-----------+-----------+
	37	* \| x86-64 \| 4KB \| 2MB \| 1GB \| \| \|
	38	* +--------------+-----------+-----------+-----------+-----------+-----------+
	39	* \| \| 4KB \| 64KB \| 2MB \| 32MB \| 1GB \|
	40	* \| +-----------+-----------+-----------+-----------+-----------+
	41	* \| arm64 \| 16KB \| 2MB \| 32MB \| 1GB \| \|
	42	* \| +-----------+-----------+-----------+-----------+-----------+
	43	* \| \| 64KB \| 2MB \| 512MB \| 16GB \| \|
	44	* +--------------+-----------+-----------+-----------+-----------+-----------+
	45	*
	46	* When the system boot up, every HugeTLB page has more than one struct page
	47	* structs which size is (unit: pages):
	48	*
	49	* struct_size = HugeTLB_Size / PAGE_SIZE * sizeof(struct page) / PAGE_SIZE
	50	*
	51	* Where HugeTLB_Size is the size of the HugeTLB page. We know that the size
	52	* of the HugeTLB page is always n times PAGE_SIZE. So we can get the following
	53	* relationship.
	54	*
	55	* HugeTLB_Size = n * PAGE_SIZE
	56	*
	57	* Then,
	58	*
	59	* struct_size = n * PAGE_SIZE / PAGE_SIZE * sizeof(struct page) / PAGE_SIZE
	60	* = n * sizeof(struct page) / PAGE_SIZE
	61	*
	62	* We can use huge mapping at the pud/pmd level for the HugeTLB page.
	63	*
	64	* For the HugeTLB page of the pmd level mapping, then
65	*
66	* struct_size = n * sizeof(struct page) / PAGE_SIZE
67	* = PAGE_SIZE / sizeof(pte_t) * sizeof(struct page) / PAGE_SIZE
68	* = sizeof(struct page) / sizeof(pte_t)
69	* = 64 / 8
70	* = 8 (pages)
71	*
72	* Where n is how many pte entries which one page can contains. So the value of
73	* n is (PAGE_SIZE / sizeof(pte_t)).
74	*
75	* This optimization only supports 64-bit system, so the value of sizeof(pte_t)
76	* is 8. And this optimization also applicable only when the size of struct page
77	* is a power of two. In most cases, the size of struct page is 64 bytes (e.g.
78	* x86-64 and arm64). So if we use pmd level mapping for a HugeTLB page, the
79	* size of struct page structs of it is 8 page frames which size depends on the
80	* size of the base page.
81	*
82	* For the HugeTLB page of the pud level mapping, then
83	*
84	* struct_size = PAGE_SIZE / sizeof(pmd_t) * struct_size(pmd)
85	* = PAGE_SIZE / 8 * 8 (pages)
86	* = PAGE_SIZE (pages)
87	*
88	* Where the struct_size(pmd) is the size of the struct page structs of a
89	* HugeTLB page of the pmd level mapping.
90	*
91	* E.g.: A 2MB HugeTLB page on x86_64 consists in 8 page frames while 1GB
92	* HugeTLB page consists in 4096.
93	*
94	* Next, we take the pmd level mapping of the HugeTLB page as an example to
95	* show the internal implementation of this optimization. There are 8 pages
96	* struct page structs associated with a HugeTLB page which is pmd mapped.
97	*
98	* Here is how things look before optimization.
99	*
100	* HugeTLB struct pages(8 pages) page frame(8 pages)
101	* +-----------+ ---virt_to_page---> +-----------+ mapping to +-----------+
102	* \| \| \| 0 \| -------------> \| 0 \|
103	* \| \| +-----------+ +-----------+
104	* \| \| \| 1 \| -------------> \| 1 \|
105	* \| \| +-----------+ +-----------+
106	* \| \| \| 2 \| -------------> \| 2 \|
107	* \| \| +-----------+ +-----------+
108	* \| \| \| 3 \| -------------> \| 3 \|
109	* \| \| +-----------+ +-----------+
110	* \| \| \| 4 \| -------------> \| 4 \|
111	* \| PMD \| +-----------+ +-----------+
112	* \| level \| \| 5 \| -------------> \| 5 \|
113	* \| mapping \| +-----------+ +-----------+
114	* \| \| \| 6 \| -------------> \| 6 \|
115	* \| \| +-----------+ +-----------+
116	* \| \| \| 7 \| -------------> \| 7 \|
117	* \| \| +-----------+ +-----------+
118	* \| \|
119	* \| \|
120	* \| \|
121	* +-----------+
122	*
123	* The value of page->compound_head is the same for all tail pages. The first
124	* page of page structs (page 0) associated with the HugeTLB page contains the 4
125	* page structs necessary to describe the HugeTLB. The only use of the remaining
126	* pages of page structs (page 1 to page 7) is to point to page->compound_head.
127	* Therefore, we can remap pages 2 to 7 to page 1. Only 2 pages of page structs
128	* will be used for each HugeTLB page. This will allow us to free the remaining
129	* 6 pages to the buddy allocator.
130	*
131	* Here is how things look after remapping.
132	*
133	* HugeTLB struct pages(8 pages) page frame(8 pages)
134	* +-----------+ ---virt_to_page---> +-----------+ mapping to +-----------+
135	* \| \| \| 0 \| -------------> \| 0 \|
136	* \| \| +-----------+ +-----------+
137	* \| \| \| 1 \| -------------> \| 1 \|
138	* \| \| +-----------+ +-----------+
139	* \| \| \| 2 \| ----------------^ ^ ^ ^ ^ ^
140	* \| \| +-----------+ \| \| \| \| \|
141	* \| \| \| 3 \| ------------------+ \| \| \| \|
142	* \| \| +-----------+ \| \| \| \|
143	* \| \| \| 4 \| --------------------+ \| \| \|
144	* \| PMD \| +-----------+ \| \| \|
145	* \| level \| \| 5 \| ----------------------+ \| \|
146	* \| mapping \| +-----------+ \| \|
147	* \| \| \| 6 \| ------------------------+ \|
148	* \| \| +-----------+ \|
149	* \| \| \| 7 \| --------------------------+
150	* \| \| +-----------+
151	* \| \|
152	* \| \|
153	* \| \|
154	* +-----------+
155	*
156	* When a HugeTLB is freed to the buddy system, we should allocate 6 pages for
157	* vmemmap pages and restore the previous mapping relationship.
158	*
159	* For the HugeTLB page of the pud level mapping. It is similar to the former.
160	* We also can use this approach to free (PAGE_SIZE - 2) vmemmap pages.
161	*
162	* Apart from the HugeTLB page of the pmd/pud level mapping, some architectures
163	* (e.g. aarch64) provides a contiguous bit in the translation table entries
164	* that hints to the MMU to indicate that it is one of a contiguous set of
165	* entries that can be cached in a single TLB entry.
166	*
167	* The contiguous bit is used to increase the mapping size at the pmd and pte
168	* (last) level. So this type of HugeTLB page can be optimized only when its
169	* size of the struct page structs is greater than 2 pages.
170	*/
171	#include "hugetlb_vmemmap.h"
172
173	/*
174	* There are a lot of struct page structures associated with each HugeTLB page.
175	* For tail pages, the value of compound_head is the same. So we can reuse first
176	* page of tail page structures. We map the virtual addresses of the remaining
177	* pages of tail page structures to the first tail page struct, and then free
178	* these page frames. Therefore, we need to reserve two pages as vmemmap areas.
179	*/
180	#define RESERVE_VMEMMAP_NR 2U
181	#define RESERVE_VMEMMAP_SIZE (RESERVE_VMEMMAP_NR << PAGE_SHIFT)
182
183	/*
184	* How many vmemmap pages associated with a HugeTLB page that can be freed
185	* to the buddy allocator.
186	*
187	* Todo: Returns zero for now, which means the feature is disabled. We will
188	* enable it once all the infrastructure is there.
189	*/
190	static inline unsigned int free_vmemmap_pages_per_hpage(struct hstate *h)
191	{
192	return 0;
193	}
194
195	static inline unsigned long free_vmemmap_pages_size_per_hpage(struct hstate *h)
196	{
197	return (unsigned long)free_vmemmap_pages_per_hpage(h) << PAGE_SHIFT;
198	}
199
200	void free_huge_page_vmemmap(struct hstate h, struct page head)
201	{
202	unsigned long vmemmap_addr = (unsigned long)head;
203	unsigned long vmemmap_end, vmemmap_reuse;
204
205	if (!free_vmemmap_pages_per_hpage(h))
206	return;
207
208	vmemmap_addr += RESERVE_VMEMMAP_SIZE;
209	vmemmap_end = vmemmap_addr + free_vmemmap_pages_size_per_hpage(h);
210	vmemmap_reuse = vmemmap_addr - PAGE_SIZE;
211
212	/*
213	* Remap the vmemmap virtual address range [@vmemmap_addr, @vmemmap_end)
214	* to the page which @vmemmap_reuse is mapped to, then free the pages
215	* which the range [@vmemmap_addr, @vmemmap_end] is mapped to.
216	*/
217	vmemmap_remap_free(vmemmap_addr, vmemmap_end, vmemmap_reuse);
218	}