]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * mm/percpu-vm.c - vmalloc area based chunk allocation | |
3 | * | |
4 | * Copyright (C) 2010 SUSE Linux Products GmbH | |
5 | * Copyright (C) 2010 Tejun Heo <tj@kernel.org> | |
6 | * | |
7 | * This file is released under the GPLv2. | |
8 | * | |
9 | * Chunks are mapped into vmalloc areas and populated page by page. | |
10 | * This is the default chunk allocator. | |
11 | */ | |
12 | ||
13 | static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk, | |
14 | unsigned int cpu, int page_idx) | |
15 | { | |
16 | /* must not be used on pre-mapped chunk */ | |
17 | WARN_ON(chunk->immutable); | |
18 | ||
19 | return vmalloc_to_page((void *)pcpu_chunk_addr(chunk, cpu, page_idx)); | |
20 | } | |
21 | ||
22 | /** | |
23 | * pcpu_get_pages - get temp pages array | |
24 | * | |
25 | * Returns pointer to array of pointers to struct page which can be indexed | |
26 | * with pcpu_page_idx(). Note that there is only one array and accesses | |
27 | * should be serialized by pcpu_alloc_mutex. | |
28 | * | |
29 | * RETURNS: | |
30 | * Pointer to temp pages array on success. | |
31 | */ | |
32 | static struct page **pcpu_get_pages(void) | |
33 | { | |
34 | static struct page **pages; | |
35 | size_t pages_size = pcpu_nr_units * pcpu_unit_pages * sizeof(pages[0]); | |
36 | ||
37 | lockdep_assert_held(&pcpu_alloc_mutex); | |
38 | ||
39 | if (!pages) | |
40 | pages = pcpu_mem_zalloc(pages_size); | |
41 | return pages; | |
42 | } | |
43 | ||
44 | /** | |
45 | * pcpu_free_pages - free pages which were allocated for @chunk | |
46 | * @chunk: chunk pages were allocated for | |
47 | * @pages: array of pages to be freed, indexed by pcpu_page_idx() | |
48 | * @page_start: page index of the first page to be freed | |
49 | * @page_end: page index of the last page to be freed + 1 | |
50 | * | |
51 | * Free pages [@page_start and @page_end) in @pages for all units. | |
52 | * The pages were allocated for @chunk. | |
53 | */ | |
54 | static void pcpu_free_pages(struct pcpu_chunk *chunk, | |
55 | struct page **pages, int page_start, int page_end) | |
56 | { | |
57 | unsigned int cpu; | |
58 | int i; | |
59 | ||
60 | for_each_possible_cpu(cpu) { | |
61 | for (i = page_start; i < page_end; i++) { | |
62 | struct page *page = pages[pcpu_page_idx(cpu, i)]; | |
63 | ||
64 | if (page) | |
65 | __free_page(page); | |
66 | } | |
67 | } | |
68 | } | |
69 | ||
70 | /** | |
71 | * pcpu_alloc_pages - allocates pages for @chunk | |
72 | * @chunk: target chunk | |
73 | * @pages: array to put the allocated pages into, indexed by pcpu_page_idx() | |
74 | * @page_start: page index of the first page to be allocated | |
75 | * @page_end: page index of the last page to be allocated + 1 | |
76 | * | |
77 | * Allocate pages [@page_start,@page_end) into @pages for all units. | |
78 | * The allocation is for @chunk. Percpu core doesn't care about the | |
79 | * content of @pages and will pass it verbatim to pcpu_map_pages(). | |
80 | */ | |
81 | static int pcpu_alloc_pages(struct pcpu_chunk *chunk, | |
82 | struct page **pages, int page_start, int page_end) | |
83 | { | |
84 | const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_COLD; | |
85 | unsigned int cpu, tcpu; | |
86 | int i; | |
87 | ||
88 | for_each_possible_cpu(cpu) { | |
89 | for (i = page_start; i < page_end; i++) { | |
90 | struct page **pagep = &pages[pcpu_page_idx(cpu, i)]; | |
91 | ||
92 | *pagep = alloc_pages_node(cpu_to_node(cpu), gfp, 0); | |
93 | if (!*pagep) | |
94 | goto err; | |
95 | } | |
96 | } | |
97 | return 0; | |
98 | ||
99 | err: | |
100 | while (--i >= page_start) | |
101 | __free_page(pages[pcpu_page_idx(cpu, i)]); | |
102 | ||
103 | for_each_possible_cpu(tcpu) { | |
104 | if (tcpu == cpu) | |
105 | break; | |
106 | for (i = page_start; i < page_end; i++) | |
107 | __free_page(pages[pcpu_page_idx(tcpu, i)]); | |
108 | } | |
109 | return -ENOMEM; | |
110 | } | |
111 | ||
112 | /** | |
113 | * pcpu_pre_unmap_flush - flush cache prior to unmapping | |
114 | * @chunk: chunk the regions to be flushed belongs to | |
115 | * @page_start: page index of the first page to be flushed | |
116 | * @page_end: page index of the last page to be flushed + 1 | |
117 | * | |
118 | * Pages in [@page_start,@page_end) of @chunk are about to be | |
119 | * unmapped. Flush cache. As each flushing trial can be very | |
120 | * expensive, issue flush on the whole region at once rather than | |
121 | * doing it for each cpu. This could be an overkill but is more | |
122 | * scalable. | |
123 | */ | |
124 | static void pcpu_pre_unmap_flush(struct pcpu_chunk *chunk, | |
125 | int page_start, int page_end) | |
126 | { | |
127 | flush_cache_vunmap( | |
128 | pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start), | |
129 | pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end)); | |
130 | } | |
131 | ||
132 | static void __pcpu_unmap_pages(unsigned long addr, int nr_pages) | |
133 | { | |
134 | unmap_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT); | |
135 | } | |
136 | ||
137 | /** | |
138 | * pcpu_unmap_pages - unmap pages out of a pcpu_chunk | |
139 | * @chunk: chunk of interest | |
140 | * @pages: pages array which can be used to pass information to free | |
141 | * @page_start: page index of the first page to unmap | |
142 | * @page_end: page index of the last page to unmap + 1 | |
143 | * | |
144 | * For each cpu, unmap pages [@page_start,@page_end) out of @chunk. | |
145 | * Corresponding elements in @pages were cleared by the caller and can | |
146 | * be used to carry information to pcpu_free_pages() which will be | |
147 | * called after all unmaps are finished. The caller should call | |
148 | * proper pre/post flush functions. | |
149 | */ | |
150 | static void pcpu_unmap_pages(struct pcpu_chunk *chunk, | |
151 | struct page **pages, int page_start, int page_end) | |
152 | { | |
153 | unsigned int cpu; | |
154 | int i; | |
155 | ||
156 | for_each_possible_cpu(cpu) { | |
157 | for (i = page_start; i < page_end; i++) { | |
158 | struct page *page; | |
159 | ||
160 | page = pcpu_chunk_page(chunk, cpu, i); | |
161 | WARN_ON(!page); | |
162 | pages[pcpu_page_idx(cpu, i)] = page; | |
163 | } | |
164 | __pcpu_unmap_pages(pcpu_chunk_addr(chunk, cpu, page_start), | |
165 | page_end - page_start); | |
166 | } | |
167 | } | |
168 | ||
169 | /** | |
170 | * pcpu_post_unmap_tlb_flush - flush TLB after unmapping | |
171 | * @chunk: pcpu_chunk the regions to be flushed belong to | |
172 | * @page_start: page index of the first page to be flushed | |
173 | * @page_end: page index of the last page to be flushed + 1 | |
174 | * | |
175 | * Pages [@page_start,@page_end) of @chunk have been unmapped. Flush | |
176 | * TLB for the regions. This can be skipped if the area is to be | |
177 | * returned to vmalloc as vmalloc will handle TLB flushing lazily. | |
178 | * | |
179 | * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once | |
180 | * for the whole region. | |
181 | */ | |
182 | static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk, | |
183 | int page_start, int page_end) | |
184 | { | |
185 | flush_tlb_kernel_range( | |
186 | pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start), | |
187 | pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end)); | |
188 | } | |
189 | ||
190 | static int __pcpu_map_pages(unsigned long addr, struct page **pages, | |
191 | int nr_pages) | |
192 | { | |
193 | return map_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT, | |
194 | PAGE_KERNEL, pages); | |
195 | } | |
196 | ||
197 | /** | |
198 | * pcpu_map_pages - map pages into a pcpu_chunk | |
199 | * @chunk: chunk of interest | |
200 | * @pages: pages array containing pages to be mapped | |
201 | * @page_start: page index of the first page to map | |
202 | * @page_end: page index of the last page to map + 1 | |
203 | * | |
204 | * For each cpu, map pages [@page_start,@page_end) into @chunk. The | |
205 | * caller is responsible for calling pcpu_post_map_flush() after all | |
206 | * mappings are complete. | |
207 | * | |
208 | * This function is responsible for setting up whatever is necessary for | |
209 | * reverse lookup (addr -> chunk). | |
210 | */ | |
211 | static int pcpu_map_pages(struct pcpu_chunk *chunk, | |
212 | struct page **pages, int page_start, int page_end) | |
213 | { | |
214 | unsigned int cpu, tcpu; | |
215 | int i, err; | |
216 | ||
217 | for_each_possible_cpu(cpu) { | |
218 | err = __pcpu_map_pages(pcpu_chunk_addr(chunk, cpu, page_start), | |
219 | &pages[pcpu_page_idx(cpu, page_start)], | |
220 | page_end - page_start); | |
221 | if (err < 0) | |
222 | goto err; | |
223 | ||
224 | for (i = page_start; i < page_end; i++) | |
225 | pcpu_set_page_chunk(pages[pcpu_page_idx(cpu, i)], | |
226 | chunk); | |
227 | } | |
228 | return 0; | |
229 | err: | |
230 | for_each_possible_cpu(tcpu) { | |
231 | if (tcpu == cpu) | |
232 | break; | |
233 | __pcpu_unmap_pages(pcpu_chunk_addr(chunk, tcpu, page_start), | |
234 | page_end - page_start); | |
235 | } | |
236 | pcpu_post_unmap_tlb_flush(chunk, page_start, page_end); | |
237 | return err; | |
238 | } | |
239 | ||
240 | /** | |
241 | * pcpu_post_map_flush - flush cache after mapping | |
242 | * @chunk: pcpu_chunk the regions to be flushed belong to | |
243 | * @page_start: page index of the first page to be flushed | |
244 | * @page_end: page index of the last page to be flushed + 1 | |
245 | * | |
246 | * Pages [@page_start,@page_end) of @chunk have been mapped. Flush | |
247 | * cache. | |
248 | * | |
249 | * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once | |
250 | * for the whole region. | |
251 | */ | |
252 | static void pcpu_post_map_flush(struct pcpu_chunk *chunk, | |
253 | int page_start, int page_end) | |
254 | { | |
255 | flush_cache_vmap( | |
256 | pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start), | |
257 | pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end)); | |
258 | } | |
259 | ||
260 | /** | |
261 | * pcpu_populate_chunk - populate and map an area of a pcpu_chunk | |
262 | * @chunk: chunk of interest | |
263 | * @page_start: the start page | |
264 | * @page_end: the end page | |
265 | * | |
266 | * For each cpu, populate and map pages [@page_start,@page_end) into | |
267 | * @chunk. | |
268 | * | |
269 | * CONTEXT: | |
270 | * pcpu_alloc_mutex, does GFP_KERNEL allocation. | |
271 | */ | |
272 | static int pcpu_populate_chunk(struct pcpu_chunk *chunk, | |
273 | int page_start, int page_end) | |
274 | { | |
275 | struct page **pages; | |
276 | ||
277 | pages = pcpu_get_pages(); | |
278 | if (!pages) | |
279 | return -ENOMEM; | |
280 | ||
281 | if (pcpu_alloc_pages(chunk, pages, page_start, page_end)) | |
282 | return -ENOMEM; | |
283 | ||
284 | if (pcpu_map_pages(chunk, pages, page_start, page_end)) { | |
285 | pcpu_free_pages(chunk, pages, page_start, page_end); | |
286 | return -ENOMEM; | |
287 | } | |
288 | pcpu_post_map_flush(chunk, page_start, page_end); | |
289 | ||
290 | return 0; | |
291 | } | |
292 | ||
293 | /** | |
294 | * pcpu_depopulate_chunk - depopulate and unmap an area of a pcpu_chunk | |
295 | * @chunk: chunk to depopulate | |
296 | * @page_start: the start page | |
297 | * @page_end: the end page | |
298 | * | |
299 | * For each cpu, depopulate and unmap pages [@page_start,@page_end) | |
300 | * from @chunk. | |
301 | * | |
302 | * CONTEXT: | |
303 | * pcpu_alloc_mutex. | |
304 | */ | |
305 | static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, | |
306 | int page_start, int page_end) | |
307 | { | |
308 | struct page **pages; | |
309 | ||
310 | /* | |
311 | * If control reaches here, there must have been at least one | |
312 | * successful population attempt so the temp pages array must | |
313 | * be available now. | |
314 | */ | |
315 | pages = pcpu_get_pages(); | |
316 | BUG_ON(!pages); | |
317 | ||
318 | /* unmap and free */ | |
319 | pcpu_pre_unmap_flush(chunk, page_start, page_end); | |
320 | ||
321 | pcpu_unmap_pages(chunk, pages, page_start, page_end); | |
322 | ||
323 | /* no need to flush tlb, vmalloc will handle it lazily */ | |
324 | ||
325 | pcpu_free_pages(chunk, pages, page_start, page_end); | |
326 | } | |
327 | ||
328 | static struct pcpu_chunk *pcpu_create_chunk(void) | |
329 | { | |
330 | struct pcpu_chunk *chunk; | |
331 | struct vm_struct **vms; | |
332 | ||
333 | chunk = pcpu_alloc_chunk(); | |
334 | if (!chunk) | |
335 | return NULL; | |
336 | ||
337 | vms = pcpu_get_vm_areas(pcpu_group_offsets, pcpu_group_sizes, | |
338 | pcpu_nr_groups, pcpu_atom_size); | |
339 | if (!vms) { | |
340 | pcpu_free_chunk(chunk); | |
341 | return NULL; | |
342 | } | |
343 | ||
344 | chunk->data = vms; | |
345 | chunk->base_addr = vms[0]->addr - pcpu_group_offsets[0]; | |
346 | ||
347 | pcpu_stats_chunk_alloc(); | |
348 | trace_percpu_create_chunk(chunk->base_addr); | |
349 | ||
350 | return chunk; | |
351 | } | |
352 | ||
353 | static void pcpu_destroy_chunk(struct pcpu_chunk *chunk) | |
354 | { | |
355 | if (!chunk) | |
356 | return; | |
357 | ||
358 | pcpu_stats_chunk_dealloc(); | |
359 | trace_percpu_destroy_chunk(chunk->base_addr); | |
360 | ||
361 | if (chunk->data) | |
362 | pcpu_free_vm_areas(chunk->data, pcpu_nr_groups); | |
363 | pcpu_free_chunk(chunk); | |
364 | } | |
365 | ||
366 | static struct page *pcpu_addr_to_page(void *addr) | |
367 | { | |
368 | return vmalloc_to_page(addr); | |
369 | } | |
370 | ||
371 | static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai) | |
372 | { | |
373 | /* no extra restriction */ | |
374 | return 0; | |
375 | } |