]>
Commit | Line | Data |
---|---|---|
901d209a JG |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | ||
b5eafe92 JF |
3 | /* |
4 | * Xen leaves the responsibility for maintaining p2m mappings to the | |
5 | * guests themselves, but it must also access and update the p2m array | |
6 | * during suspend/resume when all the pages are reallocated. | |
7 | * | |
054954eb JG |
8 | * The logical flat p2m table is mapped to a linear kernel memory area. |
9 | * For accesses by Xen a three-level tree linked via mfns only is set up to | |
10 | * allow the address space to be sparse. | |
b5eafe92 | 11 | * |
054954eb JG |
12 | * Xen |
13 | * | | |
14 | * p2m_top_mfn | |
15 | * / \ | |
16 | * p2m_mid_mfn p2m_mid_mfn | |
17 | * / / | |
18 | * p2m p2m p2m ... | |
b5eafe92 JF |
19 | * |
20 | * The p2m_mid_mfn pages are mapped by p2m_top_mfn_p. | |
21 | * | |
054954eb JG |
22 | * The p2m_top_mfn level is limited to 1 page, so the maximum representable |
23 | * pseudo-physical address space is: | |
b5eafe92 JF |
24 | * P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages |
25 | * | |
26 | * P2M_PER_PAGE depends on the architecture, as a mfn is always | |
27 | * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to | |
a3118beb | 28 | * 512 and 1024 entries respectively. |
f4cec35b KRW |
29 | * |
30 | * In short, these structures contain the Machine Frame Number (MFN) of the PFN. | |
31 | * | |
32 | * However not all entries are filled with MFNs. Specifically for all other | |
33 | * leaf entries, or for the top root, or middle one, for which there is a void | |
34 | * entry, we assume it is "missing". So (for example) | |
35 | * pfn_to_mfn(0x90909090)=INVALID_P2M_ENTRY. | |
054954eb JG |
36 | * We have a dedicated page p2m_missing with all entries being |
37 | * INVALID_P2M_ENTRY. This page may be referenced multiple times in the p2m | |
38 | * list/tree in case there are multiple areas with P2M_PER_PAGE invalid pfns. | |
f4cec35b KRW |
39 | * |
40 | * We also have the possibility of setting 1-1 mappings on certain regions, so | |
41 | * that: | |
42 | * pfn_to_mfn(0xc0000)=0xc0000 | |
43 | * | |
44 | * The benefit of this is, that we can assume for non-RAM regions (think | |
3cb83e46 | 45 | * PCI BARs, or ACPI spaces), we can create mappings easily because we |
f4cec35b KRW |
46 | * get the PFN value to match the MFN. |
47 | * | |
054954eb JG |
48 | * For this to work efficiently we have one new page p2m_identity. All entries |
49 | * in p2m_identity are set to INVALID_P2M_ENTRY type (Xen toolstack only | |
50 | * recognizes that and MFNs, no other fancy value). | |
f4cec35b KRW |
51 | * |
52 | * On lookup we spot that the entry points to p2m_identity and return the | |
53 | * identity value instead of dereferencing and returning INVALID_P2M_ENTRY. | |
54 | * If the entry points to an allocated page, we just proceed as before and | |
054954eb | 55 | * return the PFN. If the PFN has IDENTITY_FRAME_BIT set we unmask that in |
f4cec35b KRW |
56 | * appropriate functions (pfn_to_mfn). |
57 | * | |
58 | * The reason for having the IDENTITY_FRAME_BIT instead of just returning the | |
59 | * PFN is that we could find ourselves where pfn_to_mfn(pfn)==pfn for a | |
60 | * non-identity pfn. To protect ourselves against we elect to set (and get) the | |
61 | * IDENTITY_FRAME_BIT on all identity mapped PFNs. | |
b5eafe92 JF |
62 | */ |
63 | ||
64 | #include <linux/init.h> | |
7a2463dc | 65 | #include <linux/export.h> |
448f2831 JF |
66 | #include <linux/list.h> |
67 | #include <linux/hash.h> | |
87f1d40a | 68 | #include <linux/sched.h> |
2222e71b | 69 | #include <linux/seq_file.h> |
2013288f | 70 | #include <linux/memblock.h> |
7108c9ce | 71 | #include <linux/slab.h> |
d6472302 | 72 | #include <linux/vmalloc.h> |
b5eafe92 JF |
73 | |
74 | #include <asm/cache.h> | |
75 | #include <asm/setup.h> | |
7c0f6ba6 | 76 | #include <linux/uaccess.h> |
b5eafe92 JF |
77 | |
78 | #include <asm/xen/page.h> | |
79 | #include <asm/xen/hypercall.h> | |
80 | #include <asm/xen/hypervisor.h> | |
ee072640 | 81 | #include <xen/balloon.h> |
0930bba6 | 82 | #include <xen/grant_table.h> |
b5eafe92 | 83 | |
0930bba6 | 84 | #include "multicalls.h" |
b5eafe92 JF |
85 | #include "xen-ops.h" |
86 | ||
cb3eb850 JG |
87 | #define P2M_MID_PER_PAGE (PAGE_SIZE / sizeof(unsigned long *)) |
88 | #define P2M_TOP_PER_PAGE (PAGE_SIZE / sizeof(unsigned long **)) | |
89 | ||
90 | #define MAX_P2M_PFN (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE) | |
91 | ||
054954eb JG |
92 | #define PMDS_PER_MID_PAGE (P2M_MID_PER_PAGE / PTRS_PER_PTE) |
93 | ||
5b8e7d80 JG |
94 | unsigned long *xen_p2m_addr __read_mostly; |
95 | EXPORT_SYMBOL_GPL(xen_p2m_addr); | |
96 | unsigned long xen_p2m_size __read_mostly; | |
97 | EXPORT_SYMBOL_GPL(xen_p2m_size); | |
b5eafe92 | 98 | unsigned long xen_max_p2m_pfn __read_mostly; |
5b8e7d80 | 99 | EXPORT_SYMBOL_GPL(xen_max_p2m_pfn); |
b5eafe92 | 100 | |
633d6f17 JG |
101 | #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG_LIMIT |
102 | #define P2M_LIMIT CONFIG_XEN_BALLOON_MEMORY_HOTPLUG_LIMIT | |
103 | #else | |
104 | #define P2M_LIMIT 0 | |
105 | #endif | |
106 | ||
054954eb JG |
107 | static DEFINE_SPINLOCK(p2m_update_lock); |
108 | ||
2c185687 JG |
109 | static unsigned long *p2m_mid_missing_mfn; |
110 | static unsigned long *p2m_top_mfn; | |
111 | static unsigned long **p2m_top_mfn_p; | |
054954eb JG |
112 | static unsigned long *p2m_missing; |
113 | static unsigned long *p2m_identity; | |
114 | static pte_t *p2m_missing_pte; | |
115 | static pte_t *p2m_identity_pte; | |
7108c9ce | 116 | |
98dd166e DV |
117 | /* |
118 | * Hint at last populated PFN. | |
119 | * | |
120 | * Used to set HYPERVISOR_shared_info->arch.max_pfn so the toolstack | |
121 | * can avoid scanning the whole P2M (which may be sized to account for | |
122 | * hotplugged memory). | |
123 | */ | |
124 | static unsigned long xen_p2m_last_pfn; | |
125 | ||
b5eafe92 JF |
126 | static inline unsigned p2m_top_index(unsigned long pfn) |
127 | { | |
128 | BUG_ON(pfn >= MAX_P2M_PFN); | |
129 | return pfn / (P2M_MID_PER_PAGE * P2M_PER_PAGE); | |
130 | } | |
131 | ||
132 | static inline unsigned p2m_mid_index(unsigned long pfn) | |
133 | { | |
134 | return (pfn / P2M_PER_PAGE) % P2M_MID_PER_PAGE; | |
135 | } | |
136 | ||
137 | static inline unsigned p2m_index(unsigned long pfn) | |
138 | { | |
139 | return pfn % P2M_PER_PAGE; | |
140 | } | |
141 | ||
b5eafe92 JF |
142 | static void p2m_top_mfn_init(unsigned long *top) |
143 | { | |
144 | unsigned i; | |
145 | ||
146 | for (i = 0; i < P2M_TOP_PER_PAGE; i++) | |
147 | top[i] = virt_to_mfn(p2m_mid_missing_mfn); | |
148 | } | |
149 | ||
150 | static void p2m_top_mfn_p_init(unsigned long **top) | |
151 | { | |
152 | unsigned i; | |
153 | ||
154 | for (i = 0; i < P2M_TOP_PER_PAGE; i++) | |
155 | top[i] = p2m_mid_missing_mfn; | |
156 | } | |
157 | ||
054954eb | 158 | static void p2m_mid_mfn_init(unsigned long *mid, unsigned long *leaf) |
b5eafe92 JF |
159 | { |
160 | unsigned i; | |
161 | ||
162 | for (i = 0; i < P2M_MID_PER_PAGE; i++) | |
054954eb | 163 | mid[i] = virt_to_mfn(leaf); |
b5eafe92 JF |
164 | } |
165 | ||
054954eb | 166 | static void p2m_init(unsigned long *p2m) |
b5eafe92 JF |
167 | { |
168 | unsigned i; | |
169 | ||
054954eb JG |
170 | for (i = 0; i < P2M_PER_PAGE; i++) |
171 | p2m[i] = INVALID_P2M_ENTRY; | |
b5eafe92 JF |
172 | } |
173 | ||
054954eb | 174 | static void p2m_init_identity(unsigned long *p2m, unsigned long pfn) |
b5eafe92 JF |
175 | { |
176 | unsigned i; | |
177 | ||
054954eb JG |
178 | for (i = 0; i < P2M_PER_PAGE; i++) |
179 | p2m[i] = IDENTITY_FRAME(pfn + i); | |
b5eafe92 JF |
180 | } |
181 | ||
7108c9ce JG |
182 | static void * __ref alloc_p2m_page(void) |
183 | { | |
8a7f97b9 MR |
184 | if (unlikely(!slab_is_available())) { |
185 | void *ptr = memblock_alloc(PAGE_SIZE, PAGE_SIZE); | |
186 | ||
187 | if (!ptr) | |
188 | panic("%s: Failed to allocate %lu bytes align=0x%lx\n", | |
189 | __func__, PAGE_SIZE, PAGE_SIZE); | |
190 | ||
191 | return ptr; | |
192 | } | |
7108c9ce | 193 | |
32d6bd90 | 194 | return (void *)__get_free_page(GFP_KERNEL); |
7108c9ce JG |
195 | } |
196 | ||
701a261a | 197 | static void __ref free_p2m_page(void *p) |
7108c9ce | 198 | { |
701a261a | 199 | if (unlikely(!slab_is_available())) { |
2013288f | 200 | memblock_free((unsigned long)p, PAGE_SIZE); |
701a261a BO |
201 | return; |
202 | } | |
203 | ||
7108c9ce JG |
204 | free_page((unsigned long)p); |
205 | } | |
206 | ||
b5eafe92 JF |
207 | /* |
208 | * Build the parallel p2m_top_mfn and p2m_mid_mfn structures | |
209 | * | |
210 | * This is called both at boot time, and after resuming from suspend: | |
2c185687 | 211 | * - At boot time we're called rather early, and must use alloc_bootmem*() |
b5eafe92 JF |
212 | * to allocate memory. |
213 | * | |
214 | * - After resume we're called from within stop_machine, but the mfn | |
2c185687 | 215 | * tree should already be completely allocated. |
b5eafe92 | 216 | */ |
44b46c3e | 217 | void __ref xen_build_mfn_list_list(void) |
b5eafe92 | 218 | { |
054954eb JG |
219 | unsigned long pfn, mfn; |
220 | pte_t *ptep; | |
221 | unsigned int level, topidx, mididx; | |
222 | unsigned long *mid_mfn_p; | |
b5eafe92 | 223 | |
82616f95 | 224 | if (xen_start_info->flags & SIF_VIRT_P2M_4TOOLS) |
696fd7c5 KRW |
225 | return; |
226 | ||
b5eafe92 JF |
227 | /* Pre-initialize p2m_top_mfn to be completely missing */ |
228 | if (p2m_top_mfn == NULL) { | |
7108c9ce | 229 | p2m_mid_missing_mfn = alloc_p2m_page(); |
3cb83e46 | 230 | p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); |
b5eafe92 | 231 | |
7108c9ce | 232 | p2m_top_mfn_p = alloc_p2m_page(); |
b5eafe92 JF |
233 | p2m_top_mfn_p_init(p2m_top_mfn_p); |
234 | ||
7108c9ce | 235 | p2m_top_mfn = alloc_p2m_page(); |
b5eafe92 JF |
236 | p2m_top_mfn_init(p2m_top_mfn); |
237 | } else { | |
238 | /* Reinitialise, mfn's all change after migration */ | |
3cb83e46 | 239 | p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); |
b5eafe92 JF |
240 | } |
241 | ||
054954eb JG |
242 | for (pfn = 0; pfn < xen_max_p2m_pfn && pfn < MAX_P2M_PFN; |
243 | pfn += P2M_PER_PAGE) { | |
244 | topidx = p2m_top_index(pfn); | |
245 | mididx = p2m_mid_index(pfn); | |
b5eafe92 | 246 | |
b5eafe92 | 247 | mid_mfn_p = p2m_top_mfn_p[topidx]; |
054954eb JG |
248 | ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn), |
249 | &level); | |
250 | BUG_ON(!ptep || level != PG_LEVEL_4K); | |
251 | mfn = pte_mfn(*ptep); | |
252 | ptep = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1)); | |
b5eafe92 JF |
253 | |
254 | /* Don't bother allocating any mfn mid levels if | |
255 | * they're just missing, just update the stored mfn, | |
256 | * since all could have changed over a migrate. | |
257 | */ | |
054954eb | 258 | if (ptep == p2m_missing_pte || ptep == p2m_identity_pte) { |
b5eafe92 JF |
259 | BUG_ON(mididx); |
260 | BUG_ON(mid_mfn_p != p2m_mid_missing_mfn); | |
261 | p2m_top_mfn[topidx] = virt_to_mfn(p2m_mid_missing_mfn); | |
262 | pfn += (P2M_MID_PER_PAGE - 1) * P2M_PER_PAGE; | |
263 | continue; | |
264 | } | |
265 | ||
266 | if (mid_mfn_p == p2m_mid_missing_mfn) { | |
7108c9ce | 267 | mid_mfn_p = alloc_p2m_page(); |
3cb83e46 | 268 | p2m_mid_mfn_init(mid_mfn_p, p2m_missing); |
b5eafe92 JF |
269 | |
270 | p2m_top_mfn_p[topidx] = mid_mfn_p; | |
271 | } | |
272 | ||
273 | p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); | |
054954eb | 274 | mid_mfn_p[mididx] = mfn; |
b5eafe92 JF |
275 | } |
276 | } | |
277 | ||
278 | void xen_setup_mfn_list_list(void) | |
279 | { | |
280 | BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); | |
281 | ||
d51e8b3e JG |
282 | if (xen_start_info->flags & SIF_VIRT_P2M_4TOOLS) |
283 | HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = ~0UL; | |
284 | else | |
285 | HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = | |
286 | virt_to_mfn(p2m_top_mfn); | |
98dd166e | 287 | HYPERVISOR_shared_info->arch.max_pfn = xen_p2m_last_pfn; |
4b9c9a11 JG |
288 | HYPERVISOR_shared_info->arch.p2m_generation = 0; |
289 | HYPERVISOR_shared_info->arch.p2m_vaddr = (unsigned long)xen_p2m_addr; | |
290 | HYPERVISOR_shared_info->arch.p2m_cr3 = | |
291 | xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); | |
b5eafe92 JF |
292 | } |
293 | ||
294 | /* Set up p2m_top to point to the domain-builder provided p2m pages */ | |
295 | void __init xen_build_dynamic_phys_to_machine(void) | |
296 | { | |
b5eafe92 JF |
297 | unsigned long pfn; |
298 | ||
5b8e7d80 | 299 | xen_p2m_addr = (unsigned long *)xen_start_info->mfn_list; |
054954eb | 300 | xen_p2m_size = ALIGN(xen_start_info->nr_pages, P2M_PER_PAGE); |
b5eafe92 | 301 | |
054954eb JG |
302 | for (pfn = xen_start_info->nr_pages; pfn < xen_p2m_size; pfn++) |
303 | xen_p2m_addr[pfn] = INVALID_P2M_ENTRY; | |
b5eafe92 | 304 | |
054954eb JG |
305 | xen_max_p2m_pfn = xen_p2m_size; |
306 | } | |
b5eafe92 | 307 | |
054954eb JG |
308 | #define P2M_TYPE_IDENTITY 0 |
309 | #define P2M_TYPE_MISSING 1 | |
310 | #define P2M_TYPE_PFN 2 | |
311 | #define P2M_TYPE_UNKNOWN 3 | |
b5eafe92 | 312 | |
054954eb JG |
313 | static int xen_p2m_elem_type(unsigned long pfn) |
314 | { | |
315 | unsigned long mfn; | |
b5eafe92 | 316 | |
054954eb JG |
317 | if (pfn >= xen_p2m_size) |
318 | return P2M_TYPE_IDENTITY; | |
b5eafe92 | 319 | |
054954eb | 320 | mfn = xen_p2m_addr[pfn]; |
b5eafe92 | 321 | |
054954eb JG |
322 | if (mfn == INVALID_P2M_ENTRY) |
323 | return P2M_TYPE_MISSING; | |
cf04d120 | 324 | |
054954eb JG |
325 | if (mfn & IDENTITY_FRAME_BIT) |
326 | return P2M_TYPE_IDENTITY; | |
327 | ||
328 | return P2M_TYPE_PFN; | |
b5eafe92 | 329 | } |
054954eb JG |
330 | |
331 | static void __init xen_rebuild_p2m_list(unsigned long *p2m) | |
357a3cfb | 332 | { |
054954eb | 333 | unsigned int i, chunk; |
357a3cfb | 334 | unsigned long pfn; |
054954eb JG |
335 | unsigned long *mfns; |
336 | pte_t *ptep; | |
337 | pmd_t *pmdp; | |
338 | int type; | |
357a3cfb | 339 | |
054954eb JG |
340 | p2m_missing = alloc_p2m_page(); |
341 | p2m_init(p2m_missing); | |
342 | p2m_identity = alloc_p2m_page(); | |
343 | p2m_init(p2m_identity); | |
b5eafe92 | 344 | |
054954eb JG |
345 | p2m_missing_pte = alloc_p2m_page(); |
346 | paravirt_alloc_pte(&init_mm, __pa(p2m_missing_pte) >> PAGE_SHIFT); | |
347 | p2m_identity_pte = alloc_p2m_page(); | |
348 | paravirt_alloc_pte(&init_mm, __pa(p2m_identity_pte) >> PAGE_SHIFT); | |
349 | for (i = 0; i < PTRS_PER_PTE; i++) { | |
350 | set_pte(p2m_missing_pte + i, | |
2e917175 | 351 | pfn_pte(PFN_DOWN(__pa(p2m_missing)), PAGE_KERNEL_RO)); |
054954eb | 352 | set_pte(p2m_identity_pte + i, |
2e917175 | 353 | pfn_pte(PFN_DOWN(__pa(p2m_identity)), PAGE_KERNEL_RO)); |
054954eb | 354 | } |
357a3cfb | 355 | |
054954eb JG |
356 | for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += chunk) { |
357 | /* | |
358 | * Try to map missing/identity PMDs or p2m-pages if possible. | |
359 | * We have to respect the structure of the mfn_list_list | |
360 | * which will be built just afterwards. | |
361 | * Chunk size to test is one p2m page if we are in the middle | |
362 | * of a mfn_list_list mid page and the complete mid page area | |
363 | * if we are at index 0 of the mid page. Please note that a | |
364 | * mid page might cover more than one PMD, e.g. on 32 bit PAE | |
365 | * kernels. | |
366 | */ | |
367 | chunk = (pfn & (P2M_PER_PAGE * P2M_MID_PER_PAGE - 1)) ? | |
368 | P2M_PER_PAGE : P2M_PER_PAGE * P2M_MID_PER_PAGE; | |
369 | ||
370 | type = xen_p2m_elem_type(pfn); | |
371 | i = 0; | |
372 | if (type != P2M_TYPE_PFN) | |
373 | for (i = 1; i < chunk; i++) | |
374 | if (xen_p2m_elem_type(pfn + i) != type) | |
375 | break; | |
376 | if (i < chunk) | |
377 | /* Reset to minimal chunk size. */ | |
378 | chunk = P2M_PER_PAGE; | |
379 | ||
380 | if (type == P2M_TYPE_PFN || i < chunk) { | |
381 | /* Use initial p2m page contents. */ | |
382 | #ifdef CONFIG_X86_64 | |
383 | mfns = alloc_p2m_page(); | |
384 | copy_page(mfns, xen_p2m_addr + pfn); | |
385 | #else | |
386 | mfns = xen_p2m_addr + pfn; | |
387 | #endif | |
388 | ptep = populate_extra_pte((unsigned long)(p2m + pfn)); | |
389 | set_pte(ptep, | |
390 | pfn_pte(PFN_DOWN(__pa(mfns)), PAGE_KERNEL)); | |
357a3cfb | 391 | continue; |
054954eb | 392 | } |
b5eafe92 | 393 | |
054954eb JG |
394 | if (chunk == P2M_PER_PAGE) { |
395 | /* Map complete missing or identity p2m-page. */ | |
396 | mfns = (type == P2M_TYPE_MISSING) ? | |
397 | p2m_missing : p2m_identity; | |
398 | ptep = populate_extra_pte((unsigned long)(p2m + pfn)); | |
399 | set_pte(ptep, | |
2e917175 | 400 | pfn_pte(PFN_DOWN(__pa(mfns)), PAGE_KERNEL_RO)); |
357a3cfb | 401 | continue; |
054954eb | 402 | } |
357a3cfb | 403 | |
054954eb JG |
404 | /* Complete missing or identity PMD(s) can be mapped. */ |
405 | ptep = (type == P2M_TYPE_MISSING) ? | |
406 | p2m_missing_pte : p2m_identity_pte; | |
407 | for (i = 0; i < PMDS_PER_MID_PAGE; i++) { | |
408 | pmdp = populate_extra_pmd( | |
82c92ed1 | 409 | (unsigned long)(p2m + pfn) + i * PMD_SIZE); |
054954eb JG |
410 | set_pmd(pmdp, __pmd(__pa(ptep) | _KERNPG_TABLE)); |
411 | } | |
412 | } | |
413 | } | |
357a3cfb | 414 | |
054954eb JG |
415 | void __init xen_vmalloc_p2m_tree(void) |
416 | { | |
417 | static struct vm_struct vm; | |
633d6f17 | 418 | unsigned long p2m_limit; |
357a3cfb | 419 | |
98dd166e DV |
420 | xen_p2m_last_pfn = xen_max_p2m_pfn; |
421 | ||
633d6f17 | 422 | p2m_limit = (phys_addr_t)P2M_LIMIT * 1024 * 1024 * 1024 / PAGE_SIZE; |
054954eb | 423 | vm.flags = VM_ALLOC; |
633d6f17 | 424 | vm.size = ALIGN(sizeof(unsigned long) * max(xen_max_p2m_pfn, p2m_limit), |
054954eb JG |
425 | PMD_SIZE * PMDS_PER_MID_PAGE); |
426 | vm_area_register_early(&vm, PMD_SIZE * PMDS_PER_MID_PAGE); | |
427 | pr_notice("p2m virtual area at %p, size is %lx\n", vm.addr, vm.size); | |
3fc509fc | 428 | |
054954eb | 429 | xen_max_p2m_pfn = vm.size / sizeof(unsigned long); |
357a3cfb | 430 | |
054954eb | 431 | xen_rebuild_p2m_list(vm.addr); |
357a3cfb | 432 | |
054954eb | 433 | xen_p2m_addr = vm.addr; |
5b8e7d80 | 434 | xen_p2m_size = xen_max_p2m_pfn; |
5b8e7d80 JG |
435 | |
436 | xen_inv_extra_mem(); | |
357a3cfb | 437 | } |
054954eb | 438 | |
b5eafe92 JF |
439 | unsigned long get_phys_to_machine(unsigned long pfn) |
440 | { | |
054954eb JG |
441 | pte_t *ptep; |
442 | unsigned int level; | |
b5eafe92 | 443 | |
5b8e7d80 JG |
444 | if (unlikely(pfn >= xen_p2m_size)) { |
445 | if (pfn < xen_max_p2m_pfn) | |
446 | return xen_chk_extra_mem(pfn); | |
447 | ||
25b884a8 | 448 | return IDENTITY_FRAME(pfn); |
5b8e7d80 | 449 | } |
b5eafe92 | 450 | |
054954eb JG |
451 | ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn), &level); |
452 | BUG_ON(!ptep || level != PG_LEVEL_4K); | |
b5eafe92 | 453 | |
f4cec35b KRW |
454 | /* |
455 | * The INVALID_P2M_ENTRY is filled in both p2m_*identity | |
456 | * and in p2m_*missing, so returning the INVALID_P2M_ENTRY | |
457 | * would be wrong. | |
458 | */ | |
054954eb | 459 | if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_identity))) |
f4cec35b KRW |
460 | return IDENTITY_FRAME(pfn); |
461 | ||
054954eb | 462 | return xen_p2m_addr[pfn]; |
b5eafe92 JF |
463 | } |
464 | EXPORT_SYMBOL_GPL(get_phys_to_machine); | |
465 | ||
054954eb JG |
466 | /* |
467 | * Allocate new pmd(s). It is checked whether the old pmd is still in place. | |
468 | * If not, nothing is changed. This is okay as the only reason for allocating | |
469 | * a new pmd is to replace p2m_missing_pte or p2m_identity_pte by a individual | |
470 | * pmd. In case of PAE/x86-32 there are multiple pmds to allocate! | |
471 | */ | |
f241b0b8 | 472 | static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *pte_pg) |
054954eb JG |
473 | { |
474 | pte_t *ptechk; | |
054954eb JG |
475 | pte_t *pte_newpg[PMDS_PER_MID_PAGE]; |
476 | pmd_t *pmdp; | |
477 | unsigned int level; | |
478 | unsigned long flags; | |
479 | unsigned long vaddr; | |
480 | int i; | |
481 | ||
482 | /* Do all allocations first to bail out in error case. */ | |
483 | for (i = 0; i < PMDS_PER_MID_PAGE; i++) { | |
484 | pte_newpg[i] = alloc_p2m_page(); | |
485 | if (!pte_newpg[i]) { | |
486 | for (i--; i >= 0; i--) | |
487 | free_p2m_page(pte_newpg[i]); | |
488 | ||
489 | return NULL; | |
490 | } | |
491 | } | |
492 | ||
493 | vaddr = addr & ~(PMD_SIZE * PMDS_PER_MID_PAGE - 1); | |
494 | ||
495 | for (i = 0; i < PMDS_PER_MID_PAGE; i++) { | |
496 | copy_page(pte_newpg[i], pte_pg); | |
497 | paravirt_alloc_pte(&init_mm, __pa(pte_newpg[i]) >> PAGE_SHIFT); | |
498 | ||
499 | pmdp = lookup_pmd_address(vaddr); | |
500 | BUG_ON(!pmdp); | |
501 | ||
502 | spin_lock_irqsave(&p2m_update_lock, flags); | |
503 | ||
504 | ptechk = lookup_address(vaddr, &level); | |
505 | if (ptechk == pte_pg) { | |
4b9c9a11 JG |
506 | HYPERVISOR_shared_info->arch.p2m_generation++; |
507 | wmb(); /* Tools are synchronizing via p2m_generation. */ | |
054954eb JG |
508 | set_pmd(pmdp, |
509 | __pmd(__pa(pte_newpg[i]) | _KERNPG_TABLE)); | |
4b9c9a11 JG |
510 | wmb(); /* Tools are synchronizing via p2m_generation. */ |
511 | HYPERVISOR_shared_info->arch.p2m_generation++; | |
054954eb JG |
512 | pte_newpg[i] = NULL; |
513 | } | |
514 | ||
515 | spin_unlock_irqrestore(&p2m_update_lock, flags); | |
516 | ||
517 | if (pte_newpg[i]) { | |
518 | paravirt_release_pte(__pa(pte_newpg[i]) >> PAGE_SHIFT); | |
519 | free_p2m_page(pte_newpg[i]); | |
520 | } | |
521 | ||
522 | vaddr += PMD_SIZE; | |
523 | } | |
524 | ||
f241b0b8 | 525 | return lookup_address(addr, &level); |
054954eb JG |
526 | } |
527 | ||
a3118beb | 528 | /* |
b5eafe92 JF |
529 | * Fully allocate the p2m structure for a given pfn. We need to check |
530 | * that both the top and mid levels are allocated, and make sure the | |
531 | * parallel mfn tree is kept in sync. We may race with other cpus, so | |
532 | * the new pages are installed with cmpxchg; if we lose the race then | |
533 | * simply free the page we allocated and use the one that's there. | |
534 | */ | |
8edfcf88 | 535 | int xen_alloc_p2m_entry(unsigned long pfn) |
b5eafe92 | 536 | { |
c70727a5 | 537 | unsigned topidx; |
b5eafe92 | 538 | unsigned long *top_mfn_p, *mid_mfn; |
054954eb JG |
539 | pte_t *ptep, *pte_pg; |
540 | unsigned int level; | |
541 | unsigned long flags; | |
542 | unsigned long addr = (unsigned long)(xen_p2m_addr + pfn); | |
543 | unsigned long p2m_pfn; | |
b5eafe92 | 544 | |
054954eb JG |
545 | ptep = lookup_address(addr, &level); |
546 | BUG_ON(!ptep || level != PG_LEVEL_4K); | |
547 | pte_pg = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1)); | |
b5eafe92 | 548 | |
054954eb JG |
549 | if (pte_pg == p2m_missing_pte || pte_pg == p2m_identity_pte) { |
550 | /* PMD level is missing, allocate a new one */ | |
f241b0b8 | 551 | ptep = alloc_p2m_pmd(addr, pte_pg); |
054954eb | 552 | if (!ptep) |
8edfcf88 | 553 | return -ENOMEM; |
b5eafe92 JF |
554 | } |
555 | ||
c70727a5 JG |
556 | if (p2m_top_mfn && pfn < MAX_P2M_PFN) { |
557 | topidx = p2m_top_index(pfn); | |
054954eb | 558 | top_mfn_p = &p2m_top_mfn[topidx]; |
6aa7de05 | 559 | mid_mfn = READ_ONCE(p2m_top_mfn_p[topidx]); |
b5eafe92 | 560 | |
054954eb | 561 | BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p); |
b5eafe92 | 562 | |
054954eb JG |
563 | if (mid_mfn == p2m_mid_missing_mfn) { |
564 | /* Separately check the mid mfn level */ | |
565 | unsigned long missing_mfn; | |
566 | unsigned long mid_mfn_mfn; | |
567 | unsigned long old_mfn; | |
b5eafe92 | 568 | |
054954eb JG |
569 | mid_mfn = alloc_p2m_page(); |
570 | if (!mid_mfn) | |
8edfcf88 | 571 | return -ENOMEM; |
b5eafe92 | 572 | |
054954eb | 573 | p2m_mid_mfn_init(mid_mfn, p2m_missing); |
b5eafe92 | 574 | |
054954eb JG |
575 | missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); |
576 | mid_mfn_mfn = virt_to_mfn(mid_mfn); | |
577 | old_mfn = cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn); | |
578 | if (old_mfn != missing_mfn) { | |
579 | free_p2m_page(mid_mfn); | |
580 | mid_mfn = mfn_to_virt(old_mfn); | |
581 | } else { | |
582 | p2m_top_mfn_p[topidx] = mid_mfn; | |
583 | } | |
239af7c7 | 584 | } |
054954eb JG |
585 | } else { |
586 | mid_mfn = NULL; | |
b5eafe92 JF |
587 | } |
588 | ||
1760f1eb | 589 | p2m_pfn = pte_pfn(READ_ONCE(*ptep)); |
054954eb JG |
590 | if (p2m_pfn == PFN_DOWN(__pa(p2m_identity)) || |
591 | p2m_pfn == PFN_DOWN(__pa(p2m_missing))) { | |
b5eafe92 JF |
592 | /* p2m leaf page is missing */ |
593 | unsigned long *p2m; | |
594 | ||
595 | p2m = alloc_p2m_page(); | |
596 | if (!p2m) | |
8edfcf88 | 597 | return -ENOMEM; |
b5eafe92 | 598 | |
054954eb JG |
599 | if (p2m_pfn == PFN_DOWN(__pa(p2m_missing))) |
600 | p2m_init(p2m); | |
601 | else | |
b8f05c88 | 602 | p2m_init_identity(p2m, pfn & ~(P2M_PER_PAGE - 1)); |
054954eb JG |
603 | |
604 | spin_lock_irqsave(&p2m_update_lock, flags); | |
605 | ||
606 | if (pte_pfn(*ptep) == p2m_pfn) { | |
4b9c9a11 JG |
607 | HYPERVISOR_shared_info->arch.p2m_generation++; |
608 | wmb(); /* Tools are synchronizing via p2m_generation. */ | |
054954eb JG |
609 | set_pte(ptep, |
610 | pfn_pte(PFN_DOWN(__pa(p2m)), PAGE_KERNEL)); | |
4b9c9a11 JG |
611 | wmb(); /* Tools are synchronizing via p2m_generation. */ |
612 | HYPERVISOR_shared_info->arch.p2m_generation++; | |
054954eb | 613 | if (mid_mfn) |
c70727a5 | 614 | mid_mfn[p2m_mid_index(pfn)] = virt_to_mfn(p2m); |
054954eb JG |
615 | p2m = NULL; |
616 | } | |
617 | ||
618 | spin_unlock_irqrestore(&p2m_update_lock, flags); | |
b5eafe92 | 619 | |
054954eb | 620 | if (p2m) |
b5eafe92 | 621 | free_p2m_page(p2m); |
b5eafe92 JF |
622 | } |
623 | ||
98dd166e DV |
624 | /* Expanded the p2m? */ |
625 | if (pfn > xen_p2m_last_pfn) { | |
626 | xen_p2m_last_pfn = pfn; | |
627 | HYPERVISOR_shared_info->arch.max_pfn = xen_p2m_last_pfn; | |
628 | } | |
629 | ||
8edfcf88 | 630 | return 0; |
b5eafe92 | 631 | } |
8edfcf88 | 632 | EXPORT_SYMBOL(xen_alloc_p2m_entry); |
b5eafe92 | 633 | |
b83c6e55 | 634 | unsigned long __init set_phys_range_identity(unsigned long pfn_s, |
f4cec35b KRW |
635 | unsigned long pfn_e) |
636 | { | |
637 | unsigned long pfn; | |
638 | ||
5b8e7d80 | 639 | if (unlikely(pfn_s >= xen_p2m_size)) |
f4cec35b KRW |
640 | return 0; |
641 | ||
f4cec35b KRW |
642 | if (pfn_s > pfn_e) |
643 | return 0; | |
644 | ||
5b8e7d80 JG |
645 | if (pfn_e > xen_p2m_size) |
646 | pfn_e = xen_p2m_size; | |
f4cec35b | 647 | |
5b8e7d80 JG |
648 | for (pfn = pfn_s; pfn < pfn_e; pfn++) |
649 | xen_p2m_addr[pfn] = IDENTITY_FRAME(pfn); | |
f4cec35b KRW |
650 | |
651 | return pfn - pfn_s; | |
652 | } | |
653 | ||
b5eafe92 JF |
654 | bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) |
655 | { | |
054954eb JG |
656 | pte_t *ptep; |
657 | unsigned int level; | |
b5eafe92 | 658 | |
5b8e7d80 | 659 | if (unlikely(pfn >= xen_p2m_size)) { |
b5eafe92 JF |
660 | BUG_ON(mfn != INVALID_P2M_ENTRY); |
661 | return true; | |
662 | } | |
663 | ||
4b9c9a11 JG |
664 | /* |
665 | * The interface requires atomic updates on p2m elements. | |
1457d8cf | 666 | * xen_safe_write_ulong() is using an atomic store via asm(). |
4b9c9a11 | 667 | */ |
90fff3ea | 668 | if (likely(!xen_safe_write_ulong(xen_p2m_addr + pfn, mfn))) |
2e917175 JG |
669 | return true; |
670 | ||
054954eb JG |
671 | ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn), &level); |
672 | BUG_ON(!ptep || level != PG_LEVEL_4K); | |
f4cec35b | 673 | |
054954eb | 674 | if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_missing))) |
b5eafe92 JF |
675 | return mfn == INVALID_P2M_ENTRY; |
676 | ||
054954eb JG |
677 | if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_identity))) |
678 | return mfn == IDENTITY_FRAME(pfn); | |
679 | ||
2e917175 | 680 | return false; |
b5eafe92 JF |
681 | } |
682 | ||
683 | bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) | |
684 | { | |
054954eb | 685 | if (unlikely(!__set_phys_to_machine(pfn, mfn))) { |
8edfcf88 DV |
686 | int ret; |
687 | ||
688 | ret = xen_alloc_p2m_entry(pfn); | |
689 | if (ret < 0) | |
b5eafe92 JF |
690 | return false; |
691 | ||
054954eb | 692 | return __set_phys_to_machine(pfn, mfn); |
b5eafe92 JF |
693 | } |
694 | ||
695 | return true; | |
696 | } | |
448f2831 | 697 | |
820c4db2 JG |
698 | int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, |
699 | struct gnttab_map_grant_ref *kmap_ops, | |
700 | struct page **pages, unsigned int count) | |
1429d46d ZK |
701 | { |
702 | int i, ret = 0; | |
820c4db2 | 703 | pte_t *pte; |
1429d46d | 704 | |
781198f1 SG |
705 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
706 | return 0; | |
707 | ||
0bb599fd DV |
708 | if (kmap_ops) { |
709 | ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, | |
710 | kmap_ops, count); | |
711 | if (ret) | |
712 | goto out; | |
1429d46d ZK |
713 | } |
714 | ||
715 | for (i = 0; i < count; i++) { | |
820c4db2 | 716 | unsigned long mfn, pfn; |
1429d46d | 717 | |
820c4db2 JG |
718 | /* Do not add to override if the map failed. */ |
719 | if (map_ops[i].status) | |
720 | continue; | |
721 | ||
722 | if (map_ops[i].flags & GNTMAP_contains_pte) { | |
723 | pte = (pte_t *)(mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) + | |
724 | (map_ops[i].host_addr & ~PAGE_MASK)); | |
725 | mfn = pte_mfn(*pte); | |
726 | } else { | |
727 | mfn = PFN_DOWN(map_ops[i].dev_bus_addr); | |
1429d46d | 728 | } |
820c4db2 | 729 | pfn = page_to_pfn(pages[i]); |
1429d46d | 730 | |
0ae65f49 JH |
731 | WARN(pfn_to_mfn(pfn) != INVALID_P2M_ENTRY, "page must be ballooned"); |
732 | ||
820c4db2 JG |
733 | if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) { |
734 | ret = -ENOMEM; | |
1429d46d | 735 | goto out; |
820c4db2 | 736 | } |
1429d46d ZK |
737 | } |
738 | ||
739 | out: | |
1429d46d ZK |
740 | return ret; |
741 | } | |
820c4db2 | 742 | EXPORT_SYMBOL_GPL(set_foreign_p2m_mapping); |
1429d46d | 743 | |
820c4db2 | 744 | int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, |
853d0289 | 745 | struct gnttab_unmap_grant_ref *kunmap_ops, |
820c4db2 | 746 | struct page **pages, unsigned int count) |
448f2831 | 747 | { |
820c4db2 | 748 | int i, ret = 0; |
448f2831 | 749 | |
781198f1 SG |
750 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
751 | return 0; | |
752 | ||
820c4db2 | 753 | for (i = 0; i < count; i++) { |
0aad5689 | 754 | unsigned long mfn = __pfn_to_mfn(page_to_pfn(pages[i])); |
820c4db2 JG |
755 | unsigned long pfn = page_to_pfn(pages[i]); |
756 | ||
757 | if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) { | |
758 | ret = -EINVAL; | |
759 | goto out; | |
448f2831 | 760 | } |
448f2831 | 761 | |
0ae65f49 | 762 | set_phys_to_machine(pfn, INVALID_P2M_ENTRY); |
820c4db2 | 763 | } |
0bb599fd DV |
764 | if (kunmap_ops) |
765 | ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, | |
766 | kunmap_ops, count); | |
820c4db2 | 767 | out: |
448f2831 JF |
768 | return ret; |
769 | } | |
820c4db2 | 770 | EXPORT_SYMBOL_GPL(clear_foreign_p2m_mapping); |
448f2831 | 771 | |
2222e71b | 772 | #ifdef CONFIG_XEN_DEBUG_FS |
a867db10 KRW |
773 | #include <linux/debugfs.h> |
774 | #include "debugfs.h" | |
775 | static int p2m_dump_show(struct seq_file *m, void *v) | |
2222e71b | 776 | { |
a491dbef | 777 | static const char * const type_name[] = { |
054954eb JG |
778 | [P2M_TYPE_IDENTITY] = "identity", |
779 | [P2M_TYPE_MISSING] = "missing", | |
780 | [P2M_TYPE_PFN] = "pfn", | |
781 | [P2M_TYPE_UNKNOWN] = "abnormal"}; | |
782 | unsigned long pfn, first_pfn; | |
783 | int type, prev_type; | |
784 | ||
785 | prev_type = xen_p2m_elem_type(0); | |
786 | first_pfn = 0; | |
787 | ||
788 | for (pfn = 0; pfn < xen_p2m_size; pfn++) { | |
789 | type = xen_p2m_elem_type(pfn); | |
790 | if (type != prev_type) { | |
791 | seq_printf(m, " [0x%lx->0x%lx] %s\n", first_pfn, pfn, | |
792 | type_name[prev_type]); | |
2222e71b | 793 | prev_type = type; |
054954eb | 794 | first_pfn = pfn; |
2222e71b KRW |
795 | } |
796 | } | |
054954eb JG |
797 | seq_printf(m, " [0x%lx->0x%lx] %s\n", first_pfn, pfn, |
798 | type_name[prev_type]); | |
2222e71b | 799 | return 0; |
2222e71b | 800 | } |
a867db10 KRW |
801 | |
802 | static int p2m_dump_open(struct inode *inode, struct file *filp) | |
803 | { | |
804 | return single_open(filp, p2m_dump_show, NULL); | |
805 | } | |
806 | ||
807 | static const struct file_operations p2m_dump_fops = { | |
808 | .open = p2m_dump_open, | |
809 | .read = seq_read, | |
810 | .llseek = seq_lseek, | |
811 | .release = single_release, | |
812 | }; | |
813 | ||
814 | static struct dentry *d_mmu_debug; | |
815 | ||
816 | static int __init xen_p2m_debugfs(void) | |
817 | { | |
818 | struct dentry *d_xen = xen_init_debugfs(); | |
819 | ||
a867db10 KRW |
820 | d_mmu_debug = debugfs_create_dir("mmu", d_xen); |
821 | ||
822 | debugfs_create_file("p2m", 0600, d_mmu_debug, NULL, &p2m_dump_fops); | |
823 | return 0; | |
824 | } | |
825 | fs_initcall(xen_p2m_debugfs); | |
826 | #endif /* CONFIG_XEN_DEBUG_FS */ |